pa.c (hppa_register_move_cost, [...]): New.
[gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "output.h"
36 #include "except.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "reload.h"
40 #include "integrate.h"
41 #include "function.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "df.h"
51
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
54 int
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 {
57 enum machine_mode store_mode;
58 enum machine_mode other_mode;
59 rtx set;
60
61 if (recog_memoized (in_insn) < 0
62 || (get_attr_type (in_insn) != TYPE_FPSTORE
63 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64 || recog_memoized (out_insn) < 0)
65 return 0;
66
67 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68
69 set = single_set (out_insn);
70 if (!set)
71 return 0;
72
73 other_mode = GET_MODE (SET_SRC (set));
74
75 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
76 }
77
78
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
82 #else
83 #define DO_FRAME_NOTES 0
84 #endif
85 #endif
86
87 static void pa_option_override (void);
88 static void copy_reg_pointer (rtx, rtx);
89 static void fix_range (const char *);
90 static bool pa_handle_option (size_t, const char *, int);
91 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
92 reg_class_t);
93 static int hppa_address_cost (rtx, bool);
94 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
95 static inline rtx force_mode (enum machine_mode, rtx);
96 static void pa_reorg (void);
97 static void pa_combine_instructions (void);
98 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
99 static bool forward_branch_p (rtx);
100 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
101 static int compute_movmem_length (rtx);
102 static int compute_clrmem_length (rtx);
103 static bool pa_assemble_integer (rtx, unsigned int, int);
104 static void remove_useless_addtr_insns (int);
105 static void store_reg (int, HOST_WIDE_INT, int);
106 static void store_reg_modify (int, int, HOST_WIDE_INT);
107 static void load_reg (int, HOST_WIDE_INT, int);
108 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
109 static rtx pa_function_value (const_tree, const_tree, bool);
110 static rtx pa_libcall_value (enum machine_mode, const_rtx);
111 static bool pa_function_value_regno_p (const unsigned int);
112 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
113 static void update_total_code_bytes (unsigned int);
114 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static int pa_adjust_cost (rtx, rtx, rtx, int);
116 static int pa_adjust_priority (rtx, int);
117 static int pa_issue_rate (void);
118 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
119 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
120 ATTRIBUTE_UNUSED;
121 static void pa_encode_section_info (tree, rtx, int);
122 static const char *pa_strip_name_encoding (const char *);
123 static bool pa_function_ok_for_sibcall (tree, tree);
124 static void pa_globalize_label (FILE *, const char *)
125 ATTRIBUTE_UNUSED;
126 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
127 HOST_WIDE_INT, tree);
128 #if !defined(USE_COLLECT2)
129 static void pa_asm_out_constructor (rtx, int);
130 static void pa_asm_out_destructor (rtx, int);
131 #endif
132 static void pa_init_builtins (void);
133 static rtx hppa_builtin_saveregs (void);
134 static void hppa_va_start (tree, rtx);
135 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
136 static bool pa_scalar_mode_supported_p (enum machine_mode);
137 static bool pa_commutative_p (const_rtx x, int outer_code);
138 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
139 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
140 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
141 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
142 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
143 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
144 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
145 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
146 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
147 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
148 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
150 static void output_deferred_plabels (void);
151 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
152 #ifdef ASM_OUTPUT_EXTERNAL_REAL
153 static void pa_hpux_file_end (void);
154 #endif
155 #ifdef HPUX_LONG_DOUBLE_LIBRARY
156 static void pa_hpux_init_libfuncs (void);
157 #endif
158 static rtx pa_struct_value_rtx (tree, int);
159 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
160 const_tree, bool);
161 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
162 tree, bool);
163 static struct machine_function * pa_init_machine_status (void);
164 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
165 enum machine_mode,
166 secondary_reload_info *);
167 static void pa_extra_live_on_entry (bitmap);
168 static enum machine_mode pa_promote_function_mode (const_tree,
169 enum machine_mode, int *,
170 const_tree, int);
171
172 static void pa_asm_trampoline_template (FILE *);
173 static void pa_trampoline_init (rtx, tree, rtx);
174 static rtx pa_trampoline_adjust_address (rtx);
175 static rtx pa_delegitimize_address (rtx);
176 static bool pa_print_operand_punct_valid_p (unsigned char);
177
178 /* The following extra sections are only used for SOM. */
179 static GTY(()) section *som_readonly_data_section;
180 static GTY(()) section *som_one_only_readonly_data_section;
181 static GTY(()) section *som_one_only_data_section;
182
183 /* Which cpu we are scheduling for. */
184 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
185
186 /* The UNIX standard to use for predefines and linking. */
187 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
188
189 /* Counts for the number of callee-saved general and floating point
190 registers which were saved by the current function's prologue. */
191 static int gr_saved, fr_saved;
192
193 /* Boolean indicating whether the return pointer was saved by the
194 current function's prologue. */
195 static bool rp_saved;
196
197 static rtx find_addr_reg (rtx);
198
199 /* Keep track of the number of bytes we have output in the CODE subspace
200 during this compilation so we'll know when to emit inline long-calls. */
201 unsigned long total_code_bytes;
202
203 /* The last address of the previous function plus the number of bytes in
204 associated thunks that have been output. This is used to determine if
205 a thunk can use an IA-relative branch to reach its target function. */
206 static unsigned int last_address;
207
208 /* Variables to handle plabels that we discover are necessary at assembly
209 output time. They are output after the current function. */
210 struct GTY(()) deferred_plabel
211 {
212 rtx internal_label;
213 rtx symbol;
214 };
215 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
216 deferred_plabels;
217 static size_t n_deferred_plabels = 0;
218
219 \f
220 /* Initialize the GCC target structure. */
221
222 #undef TARGET_OPTION_OVERRIDE
223 #define TARGET_OPTION_OVERRIDE pa_option_override
224
225 #undef TARGET_ASM_ALIGNED_HI_OP
226 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
227 #undef TARGET_ASM_ALIGNED_SI_OP
228 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
229 #undef TARGET_ASM_ALIGNED_DI_OP
230 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
231 #undef TARGET_ASM_UNALIGNED_HI_OP
232 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
233 #undef TARGET_ASM_UNALIGNED_SI_OP
234 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
235 #undef TARGET_ASM_UNALIGNED_DI_OP
236 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
237 #undef TARGET_ASM_INTEGER
238 #define TARGET_ASM_INTEGER pa_assemble_integer
239
240 #undef TARGET_ASM_FUNCTION_PROLOGUE
241 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
242 #undef TARGET_ASM_FUNCTION_EPILOGUE
243 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
244
245 #undef TARGET_FUNCTION_VALUE
246 #define TARGET_FUNCTION_VALUE pa_function_value
247 #undef TARGET_LIBCALL_VALUE
248 #define TARGET_LIBCALL_VALUE pa_libcall_value
249 #undef TARGET_FUNCTION_VALUE_REGNO_P
250 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
251
252 #undef TARGET_LEGITIMIZE_ADDRESS
253 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
254
255 #undef TARGET_SCHED_ADJUST_COST
256 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
257 #undef TARGET_SCHED_ADJUST_PRIORITY
258 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
259 #undef TARGET_SCHED_ISSUE_RATE
260 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
261
262 #undef TARGET_ENCODE_SECTION_INFO
263 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
264 #undef TARGET_STRIP_NAME_ENCODING
265 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
266
267 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
268 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
269
270 #undef TARGET_COMMUTATIVE_P
271 #define TARGET_COMMUTATIVE_P pa_commutative_p
272
273 #undef TARGET_ASM_OUTPUT_MI_THUNK
274 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
275 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
276 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
277
278 #undef TARGET_ASM_FILE_END
279 #ifdef ASM_OUTPUT_EXTERNAL_REAL
280 #define TARGET_ASM_FILE_END pa_hpux_file_end
281 #else
282 #define TARGET_ASM_FILE_END output_deferred_plabels
283 #endif
284
285 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
286 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
287
288 #if !defined(USE_COLLECT2)
289 #undef TARGET_ASM_CONSTRUCTOR
290 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
291 #undef TARGET_ASM_DESTRUCTOR
292 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
293 #endif
294
295 #undef TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
297 #undef TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION pa_handle_option
299
300 #undef TARGET_INIT_BUILTINS
301 #define TARGET_INIT_BUILTINS pa_init_builtins
302
303 #undef TARGET_REGISTER_MOVE_COST
304 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
305 #undef TARGET_RTX_COSTS
306 #define TARGET_RTX_COSTS hppa_rtx_costs
307 #undef TARGET_ADDRESS_COST
308 #define TARGET_ADDRESS_COST hppa_address_cost
309
310 #undef TARGET_MACHINE_DEPENDENT_REORG
311 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
312
313 #ifdef HPUX_LONG_DOUBLE_LIBRARY
314 #undef TARGET_INIT_LIBFUNCS
315 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
316 #endif
317
318 #undef TARGET_PROMOTE_FUNCTION_MODE
319 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
320 #undef TARGET_PROMOTE_PROTOTYPES
321 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
322
323 #undef TARGET_STRUCT_VALUE_RTX
324 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
325 #undef TARGET_RETURN_IN_MEMORY
326 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
327 #undef TARGET_MUST_PASS_IN_STACK
328 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
329 #undef TARGET_PASS_BY_REFERENCE
330 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
331 #undef TARGET_CALLEE_COPIES
332 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
333 #undef TARGET_ARG_PARTIAL_BYTES
334 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
335
336 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
337 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
338 #undef TARGET_EXPAND_BUILTIN_VA_START
339 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
341 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
342
343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
344 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
345
346 #undef TARGET_CANNOT_FORCE_CONST_MEM
347 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
348
349 #undef TARGET_SECONDARY_RELOAD
350 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
351
352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
353 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
354
355 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
356 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
357 #undef TARGET_TRAMPOLINE_INIT
358 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
359 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
360 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
361 #undef TARGET_DELEGITIMIZE_ADDRESS
362 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
363
364 struct gcc_target targetm = TARGET_INITIALIZER;
365 \f
366 /* Parse the -mfixed-range= option string. */
367
368 static void
369 fix_range (const char *const_str)
370 {
371 int i, first, last;
372 char *str, *dash, *comma;
373
374 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
375 REG2 are either register names or register numbers. The effect
376 of this option is to mark the registers in the range from REG1 to
377 REG2 as ``fixed'' so they won't be used by the compiler. This is
378 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
379
380 i = strlen (const_str);
381 str = (char *) alloca (i + 1);
382 memcpy (str, const_str, i + 1);
383
384 while (1)
385 {
386 dash = strchr (str, '-');
387 if (!dash)
388 {
389 warning (0, "value of -mfixed-range must have form REG1-REG2");
390 return;
391 }
392 *dash = '\0';
393
394 comma = strchr (dash + 1, ',');
395 if (comma)
396 *comma = '\0';
397
398 first = decode_reg_name (str);
399 if (first < 0)
400 {
401 warning (0, "unknown register name: %s", str);
402 return;
403 }
404
405 last = decode_reg_name (dash + 1);
406 if (last < 0)
407 {
408 warning (0, "unknown register name: %s", dash + 1);
409 return;
410 }
411
412 *dash = '-';
413
414 if (first > last)
415 {
416 warning (0, "%s-%s is an empty range", str, dash + 1);
417 return;
418 }
419
420 for (i = first; i <= last; ++i)
421 fixed_regs[i] = call_used_regs[i] = 1;
422
423 if (!comma)
424 break;
425
426 *comma = ',';
427 str = comma + 1;
428 }
429
430 /* Check if all floating point registers have been fixed. */
431 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
432 if (!fixed_regs[i])
433 break;
434
435 if (i > FP_REG_LAST)
436 target_flags |= MASK_DISABLE_FPREGS;
437 }
438
439 /* Implement TARGET_HANDLE_OPTION. */
440
441 static bool
442 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
443 {
444 switch (code)
445 {
446 case OPT_mnosnake:
447 case OPT_mpa_risc_1_0:
448 case OPT_march_1_0:
449 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
450 return true;
451
452 case OPT_msnake:
453 case OPT_mpa_risc_1_1:
454 case OPT_march_1_1:
455 target_flags &= ~MASK_PA_20;
456 target_flags |= MASK_PA_11;
457 return true;
458
459 case OPT_mpa_risc_2_0:
460 case OPT_march_2_0:
461 target_flags |= MASK_PA_11 | MASK_PA_20;
462 return true;
463
464 case OPT_mschedule_:
465 if (strcmp (arg, "8000") == 0)
466 pa_cpu = PROCESSOR_8000;
467 else if (strcmp (arg, "7100") == 0)
468 pa_cpu = PROCESSOR_7100;
469 else if (strcmp (arg, "700") == 0)
470 pa_cpu = PROCESSOR_700;
471 else if (strcmp (arg, "7100LC") == 0)
472 pa_cpu = PROCESSOR_7100LC;
473 else if (strcmp (arg, "7200") == 0)
474 pa_cpu = PROCESSOR_7200;
475 else if (strcmp (arg, "7300") == 0)
476 pa_cpu = PROCESSOR_7300;
477 else
478 return false;
479 return true;
480
481 case OPT_mfixed_range_:
482 fix_range (arg);
483 return true;
484
485 #if TARGET_HPUX
486 case OPT_munix_93:
487 flag_pa_unix = 1993;
488 return true;
489 #endif
490
491 #if TARGET_HPUX_10_10
492 case OPT_munix_95:
493 flag_pa_unix = 1995;
494 return true;
495 #endif
496
497 #if TARGET_HPUX_11_11
498 case OPT_munix_98:
499 flag_pa_unix = 1998;
500 return true;
501 #endif
502
503 default:
504 return true;
505 }
506 }
507
508 /* Implement the TARGET_OPTION_OVERRIDE hook. */
509
510 static void
511 pa_option_override (void)
512 {
513 /* Unconditional branches in the delay slot are not compatible with dwarf2
514 call frame information. There is no benefit in using this optimization
515 on PA8000 and later processors. */
516 if (pa_cpu >= PROCESSOR_8000
517 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
518 || flag_unwind_tables)
519 target_flags &= ~MASK_JUMP_IN_DELAY;
520
521 if (flag_pic && TARGET_PORTABLE_RUNTIME)
522 {
523 warning (0, "PIC code generation is not supported in the portable runtime model");
524 }
525
526 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
527 {
528 warning (0, "PIC code generation is not compatible with fast indirect calls");
529 }
530
531 if (! TARGET_GAS && write_symbols != NO_DEBUG)
532 {
533 warning (0, "-g is only supported when using GAS on this processor,");
534 warning (0, "-g option disabled");
535 write_symbols = NO_DEBUG;
536 }
537
538 /* We only support the "big PIC" model now. And we always generate PIC
539 code when in 64bit mode. */
540 if (flag_pic == 1 || TARGET_64BIT)
541 flag_pic = 2;
542
543 /* Disable -freorder-blocks-and-partition as we don't support hot and
544 cold partitioning. */
545 if (flag_reorder_blocks_and_partition)
546 {
547 inform (input_location,
548 "-freorder-blocks-and-partition does not work "
549 "on this architecture");
550 flag_reorder_blocks_and_partition = 0;
551 flag_reorder_blocks = 1;
552 }
553
554 /* We can't guarantee that .dword is available for 32-bit targets. */
555 if (UNITS_PER_WORD == 4)
556 targetm.asm_out.aligned_op.di = NULL;
557
558 /* The unaligned ops are only available when using GAS. */
559 if (!TARGET_GAS)
560 {
561 targetm.asm_out.unaligned_op.hi = NULL;
562 targetm.asm_out.unaligned_op.si = NULL;
563 targetm.asm_out.unaligned_op.di = NULL;
564 }
565
566 init_machine_status = pa_init_machine_status;
567 }
568
569 static void
570 pa_init_builtins (void)
571 {
572 #ifdef DONT_HAVE_FPUTC_UNLOCKED
573 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
574 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
575 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
576 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
577 #endif
578 #if TARGET_HPUX_11
579 if (built_in_decls [BUILT_IN_FINITE])
580 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
581 if (built_in_decls [BUILT_IN_FINITEF])
582 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
583 #endif
584 }
585
586 /* Function to init struct machine_function.
587 This will be called, via a pointer variable,
588 from push_function_context. */
589
590 static struct machine_function *
591 pa_init_machine_status (void)
592 {
593 return ggc_alloc_cleared_machine_function ();
594 }
595
596 /* If FROM is a probable pointer register, mark TO as a probable
597 pointer register with the same pointer alignment as FROM. */
598
599 static void
600 copy_reg_pointer (rtx to, rtx from)
601 {
602 if (REG_POINTER (from))
603 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
604 }
605
606 /* Return 1 if X contains a symbolic expression. We know these
607 expressions will have one of a few well defined forms, so
608 we need only check those forms. */
609 int
610 symbolic_expression_p (rtx x)
611 {
612
613 /* Strip off any HIGH. */
614 if (GET_CODE (x) == HIGH)
615 x = XEXP (x, 0);
616
617 return (symbolic_operand (x, VOIDmode));
618 }
619
620 /* Accept any constant that can be moved in one instruction into a
621 general register. */
622 int
623 cint_ok_for_move (HOST_WIDE_INT ival)
624 {
625 /* OK if ldo, ldil, or zdepi, can be used. */
626 return (VAL_14_BITS_P (ival)
627 || ldil_cint_p (ival)
628 || zdepi_cint_p (ival));
629 }
630 \f
631 /* Return truth value of whether OP can be used as an operand in a
632 adddi3 insn. */
633 int
634 adddi3_operand (rtx op, enum machine_mode mode)
635 {
636 return (register_operand (op, mode)
637 || (GET_CODE (op) == CONST_INT
638 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
639 }
640
641 /* True iff the operand OP can be used as the destination operand of
642 an integer store. This also implies the operand could be used as
643 the source operand of an integer load. Symbolic, lo_sum and indexed
644 memory operands are not allowed. We accept reloading pseudos and
645 other memory operands. */
646 int
647 integer_store_memory_operand (rtx op, enum machine_mode mode)
648 {
649 return ((reload_in_progress
650 && REG_P (op)
651 && REGNO (op) >= FIRST_PSEUDO_REGISTER
652 && reg_renumber [REGNO (op)] < 0)
653 || (GET_CODE (op) == MEM
654 && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
655 && !symbolic_memory_operand (op, VOIDmode)
656 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
657 && !IS_INDEX_ADDR_P (XEXP (op, 0))));
658 }
659
660 /* True iff ldil can be used to load this CONST_INT. The least
661 significant 11 bits of the value must be zero and the value must
662 not change sign when extended from 32 to 64 bits. */
663 int
664 ldil_cint_p (HOST_WIDE_INT ival)
665 {
666 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
667
668 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
669 }
670
671 /* True iff zdepi can be used to generate this CONST_INT.
672 zdepi first sign extends a 5-bit signed number to a given field
673 length, then places this field anywhere in a zero. */
674 int
675 zdepi_cint_p (unsigned HOST_WIDE_INT x)
676 {
677 unsigned HOST_WIDE_INT lsb_mask, t;
678
679 /* This might not be obvious, but it's at least fast.
680 This function is critical; we don't have the time loops would take. */
681 lsb_mask = x & -x;
682 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
683 /* Return true iff t is a power of two. */
684 return ((t & (t - 1)) == 0);
685 }
686
687 /* True iff depi or extru can be used to compute (reg & mask).
688 Accept bit pattern like these:
689 0....01....1
690 1....10....0
691 1..10..01..1 */
692 int
693 and_mask_p (unsigned HOST_WIDE_INT mask)
694 {
695 mask = ~mask;
696 mask += mask & -mask;
697 return (mask & (mask - 1)) == 0;
698 }
699
700 /* True iff depi can be used to compute (reg | MASK). */
701 int
702 ior_mask_p (unsigned HOST_WIDE_INT mask)
703 {
704 mask += mask & -mask;
705 return (mask & (mask - 1)) == 0;
706 }
707 \f
708 /* Legitimize PIC addresses. If the address is already
709 position-independent, we return ORIG. Newly generated
710 position-independent addresses go to REG. If we need more
711 than one register, we lose. */
712
713 rtx
714 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
715 {
716 rtx pic_ref = orig;
717
718 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
719
720 /* Labels need special handling. */
721 if (pic_label_operand (orig, mode))
722 {
723 rtx insn;
724
725 /* We do not want to go through the movXX expanders here since that
726 would create recursion.
727
728 Nor do we really want to call a generator for a named pattern
729 since that requires multiple patterns if we want to support
730 multiple word sizes.
731
732 So instead we just emit the raw set, which avoids the movXX
733 expanders completely. */
734 mark_reg_pointer (reg, BITS_PER_UNIT);
735 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
736
737 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
738 add_reg_note (insn, REG_EQUAL, orig);
739
740 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
741 and update LABEL_NUSES because this is not done automatically. */
742 if (reload_in_progress || reload_completed)
743 {
744 /* Extract LABEL_REF. */
745 if (GET_CODE (orig) == CONST)
746 orig = XEXP (XEXP (orig, 0), 0);
747 /* Extract CODE_LABEL. */
748 orig = XEXP (orig, 0);
749 add_reg_note (insn, REG_LABEL_OPERAND, orig);
750 LABEL_NUSES (orig)++;
751 }
752 crtl->uses_pic_offset_table = 1;
753 return reg;
754 }
755 if (GET_CODE (orig) == SYMBOL_REF)
756 {
757 rtx insn, tmp_reg;
758
759 gcc_assert (reg);
760
761 /* Before reload, allocate a temporary register for the intermediate
762 result. This allows the sequence to be deleted when the final
763 result is unused and the insns are trivially dead. */
764 tmp_reg = ((reload_in_progress || reload_completed)
765 ? reg : gen_reg_rtx (Pmode));
766
767 if (function_label_operand (orig, mode))
768 {
769 /* Force function label into memory in word mode. */
770 orig = XEXP (force_const_mem (word_mode, orig), 0);
771 /* Load plabel address from DLT. */
772 emit_move_insn (tmp_reg,
773 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
774 gen_rtx_HIGH (word_mode, orig)));
775 pic_ref
776 = gen_const_mem (Pmode,
777 gen_rtx_LO_SUM (Pmode, tmp_reg,
778 gen_rtx_UNSPEC (Pmode,
779 gen_rtvec (1, orig),
780 UNSPEC_DLTIND14R)));
781 emit_move_insn (reg, pic_ref);
782 /* Now load address of function descriptor. */
783 pic_ref = gen_rtx_MEM (Pmode, reg);
784 }
785 else
786 {
787 /* Load symbol reference from DLT. */
788 emit_move_insn (tmp_reg,
789 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
790 gen_rtx_HIGH (word_mode, orig)));
791 pic_ref
792 = gen_const_mem (Pmode,
793 gen_rtx_LO_SUM (Pmode, tmp_reg,
794 gen_rtx_UNSPEC (Pmode,
795 gen_rtvec (1, orig),
796 UNSPEC_DLTIND14R)));
797 }
798
799 crtl->uses_pic_offset_table = 1;
800 mark_reg_pointer (reg, BITS_PER_UNIT);
801 insn = emit_move_insn (reg, pic_ref);
802
803 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
804 set_unique_reg_note (insn, REG_EQUAL, orig);
805
806 return reg;
807 }
808 else if (GET_CODE (orig) == CONST)
809 {
810 rtx base;
811
812 if (GET_CODE (XEXP (orig, 0)) == PLUS
813 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
814 return orig;
815
816 gcc_assert (reg);
817 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
818
819 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
820 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
821 base == reg ? 0 : reg);
822
823 if (GET_CODE (orig) == CONST_INT)
824 {
825 if (INT_14_BITS (orig))
826 return plus_constant (base, INTVAL (orig));
827 orig = force_reg (Pmode, orig);
828 }
829 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
830 /* Likewise, should we set special REG_NOTEs here? */
831 }
832
833 return pic_ref;
834 }
835
836 static GTY(()) rtx gen_tls_tga;
837
838 static rtx
839 gen_tls_get_addr (void)
840 {
841 if (!gen_tls_tga)
842 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
843 return gen_tls_tga;
844 }
845
846 static rtx
847 hppa_tls_call (rtx arg)
848 {
849 rtx ret;
850
851 ret = gen_reg_rtx (Pmode);
852 emit_library_call_value (gen_tls_get_addr (), ret,
853 LCT_CONST, Pmode, 1, arg, Pmode);
854
855 return ret;
856 }
857
858 static rtx
859 legitimize_tls_address (rtx addr)
860 {
861 rtx ret, insn, tmp, t1, t2, tp;
862 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
863
864 switch (model)
865 {
866 case TLS_MODEL_GLOBAL_DYNAMIC:
867 tmp = gen_reg_rtx (Pmode);
868 if (flag_pic)
869 emit_insn (gen_tgd_load_pic (tmp, addr));
870 else
871 emit_insn (gen_tgd_load (tmp, addr));
872 ret = hppa_tls_call (tmp);
873 break;
874
875 case TLS_MODEL_LOCAL_DYNAMIC:
876 ret = gen_reg_rtx (Pmode);
877 tmp = gen_reg_rtx (Pmode);
878 start_sequence ();
879 if (flag_pic)
880 emit_insn (gen_tld_load_pic (tmp, addr));
881 else
882 emit_insn (gen_tld_load (tmp, addr));
883 t1 = hppa_tls_call (tmp);
884 insn = get_insns ();
885 end_sequence ();
886 t2 = gen_reg_rtx (Pmode);
887 emit_libcall_block (insn, t2, t1,
888 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
889 UNSPEC_TLSLDBASE));
890 emit_insn (gen_tld_offset_load (ret, addr, t2));
891 break;
892
893 case TLS_MODEL_INITIAL_EXEC:
894 tp = gen_reg_rtx (Pmode);
895 tmp = gen_reg_rtx (Pmode);
896 ret = gen_reg_rtx (Pmode);
897 emit_insn (gen_tp_load (tp));
898 if (flag_pic)
899 emit_insn (gen_tie_load_pic (tmp, addr));
900 else
901 emit_insn (gen_tie_load (tmp, addr));
902 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
903 break;
904
905 case TLS_MODEL_LOCAL_EXEC:
906 tp = gen_reg_rtx (Pmode);
907 ret = gen_reg_rtx (Pmode);
908 emit_insn (gen_tp_load (tp));
909 emit_insn (gen_tle_load (ret, addr, tp));
910 break;
911
912 default:
913 gcc_unreachable ();
914 }
915
916 return ret;
917 }
918
919 /* Try machine-dependent ways of modifying an illegitimate address
920 to be legitimate. If we find one, return the new, valid address.
921 This macro is used in only one place: `memory_address' in explow.c.
922
923 OLDX is the address as it was before break_out_memory_refs was called.
924 In some cases it is useful to look at this to decide what needs to be done.
925
926 It is always safe for this macro to do nothing. It exists to recognize
927 opportunities to optimize the output.
928
929 For the PA, transform:
930
931 memory(X + <large int>)
932
933 into:
934
935 if (<large int> & mask) >= 16
936 Y = (<large int> & ~mask) + mask + 1 Round up.
937 else
938 Y = (<large int> & ~mask) Round down.
939 Z = X + Y
940 memory (Z + (<large int> - Y));
941
942 This is for CSE to find several similar references, and only use one Z.
943
944 X can either be a SYMBOL_REF or REG, but because combine cannot
945 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
946 D will not fit in 14 bits.
947
948 MODE_FLOAT references allow displacements which fit in 5 bits, so use
949 0x1f as the mask.
950
951 MODE_INT references allow displacements which fit in 14 bits, so use
952 0x3fff as the mask.
953
954 This relies on the fact that most mode MODE_FLOAT references will use FP
955 registers and most mode MODE_INT references will use integer registers.
956 (In the rare case of an FP register used in an integer MODE, we depend
957 on secondary reloads to clean things up.)
958
959
960 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
961 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
962 addressing modes to be used).
963
964 Put X and Z into registers. Then put the entire expression into
965 a register. */
966
967 rtx
968 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
969 enum machine_mode mode)
970 {
971 rtx orig = x;
972
973 /* We need to canonicalize the order of operands in unscaled indexed
974 addresses since the code that checks if an address is valid doesn't
975 always try both orders. */
976 if (!TARGET_NO_SPACE_REGS
977 && GET_CODE (x) == PLUS
978 && GET_MODE (x) == Pmode
979 && REG_P (XEXP (x, 0))
980 && REG_P (XEXP (x, 1))
981 && REG_POINTER (XEXP (x, 0))
982 && !REG_POINTER (XEXP (x, 1)))
983 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
984
985 if (PA_SYMBOL_REF_TLS_P (x))
986 return legitimize_tls_address (x);
987 else if (flag_pic)
988 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
989
990 /* Strip off CONST. */
991 if (GET_CODE (x) == CONST)
992 x = XEXP (x, 0);
993
994 /* Special case. Get the SYMBOL_REF into a register and use indexing.
995 That should always be safe. */
996 if (GET_CODE (x) == PLUS
997 && GET_CODE (XEXP (x, 0)) == REG
998 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
999 {
1000 rtx reg = force_reg (Pmode, XEXP (x, 1));
1001 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1002 }
1003
1004 /* Note we must reject symbols which represent function addresses
1005 since the assembler/linker can't handle arithmetic on plabels. */
1006 if (GET_CODE (x) == PLUS
1007 && GET_CODE (XEXP (x, 1)) == CONST_INT
1008 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1009 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1010 || GET_CODE (XEXP (x, 0)) == REG))
1011 {
1012 rtx int_part, ptr_reg;
1013 int newoffset;
1014 int offset = INTVAL (XEXP (x, 1));
1015 int mask;
1016
1017 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1018 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
1019
1020 /* Choose which way to round the offset. Round up if we
1021 are >= halfway to the next boundary. */
1022 if ((offset & mask) >= ((mask + 1) / 2))
1023 newoffset = (offset & ~ mask) + mask + 1;
1024 else
1025 newoffset = (offset & ~ mask);
1026
1027 /* If the newoffset will not fit in 14 bits (ldo), then
1028 handling this would take 4 or 5 instructions (2 to load
1029 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1030 add the new offset and the SYMBOL_REF.) Combine can
1031 not handle 4->2 or 5->2 combinations, so do not create
1032 them. */
1033 if (! VAL_14_BITS_P (newoffset)
1034 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1035 {
1036 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1037 rtx tmp_reg
1038 = force_reg (Pmode,
1039 gen_rtx_HIGH (Pmode, const_part));
1040 ptr_reg
1041 = force_reg (Pmode,
1042 gen_rtx_LO_SUM (Pmode,
1043 tmp_reg, const_part));
1044 }
1045 else
1046 {
1047 if (! VAL_14_BITS_P (newoffset))
1048 int_part = force_reg (Pmode, GEN_INT (newoffset));
1049 else
1050 int_part = GEN_INT (newoffset);
1051
1052 ptr_reg = force_reg (Pmode,
1053 gen_rtx_PLUS (Pmode,
1054 force_reg (Pmode, XEXP (x, 0)),
1055 int_part));
1056 }
1057 return plus_constant (ptr_reg, offset - newoffset);
1058 }
1059
1060 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1061
1062 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1063 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1064 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1065 && (OBJECT_P (XEXP (x, 1))
1066 || GET_CODE (XEXP (x, 1)) == SUBREG)
1067 && GET_CODE (XEXP (x, 1)) != CONST)
1068 {
1069 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1070 rtx reg1, reg2;
1071
1072 reg1 = XEXP (x, 1);
1073 if (GET_CODE (reg1) != REG)
1074 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1075
1076 reg2 = XEXP (XEXP (x, 0), 0);
1077 if (GET_CODE (reg2) != REG)
1078 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1079
1080 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1081 gen_rtx_MULT (Pmode,
1082 reg2,
1083 GEN_INT (val)),
1084 reg1));
1085 }
1086
1087 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1088
1089 Only do so for floating point modes since this is more speculative
1090 and we lose if it's an integer store. */
1091 if (GET_CODE (x) == PLUS
1092 && GET_CODE (XEXP (x, 0)) == PLUS
1093 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1094 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1095 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1096 && (mode == SFmode || mode == DFmode))
1097 {
1098
1099 /* First, try and figure out what to use as a base register. */
1100 rtx reg1, reg2, base, idx;
1101
1102 reg1 = XEXP (XEXP (x, 0), 1);
1103 reg2 = XEXP (x, 1);
1104 base = NULL_RTX;
1105 idx = NULL_RTX;
1106
1107 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1108 then emit_move_sequence will turn on REG_POINTER so we'll know
1109 it's a base register below. */
1110 if (GET_CODE (reg1) != REG)
1111 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1112
1113 if (GET_CODE (reg2) != REG)
1114 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115
1116 /* Figure out what the base and index are. */
1117
1118 if (GET_CODE (reg1) == REG
1119 && REG_POINTER (reg1))
1120 {
1121 base = reg1;
1122 idx = gen_rtx_PLUS (Pmode,
1123 gen_rtx_MULT (Pmode,
1124 XEXP (XEXP (XEXP (x, 0), 0), 0),
1125 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1126 XEXP (x, 1));
1127 }
1128 else if (GET_CODE (reg2) == REG
1129 && REG_POINTER (reg2))
1130 {
1131 base = reg2;
1132 idx = XEXP (x, 0);
1133 }
1134
1135 if (base == 0)
1136 return orig;
1137
1138 /* If the index adds a large constant, try to scale the
1139 constant so that it can be loaded with only one insn. */
1140 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1141 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1142 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1143 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1144 {
1145 /* Divide the CONST_INT by the scale factor, then add it to A. */
1146 int val = INTVAL (XEXP (idx, 1));
1147
1148 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1149 reg1 = XEXP (XEXP (idx, 0), 0);
1150 if (GET_CODE (reg1) != REG)
1151 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1152
1153 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1154
1155 /* We can now generate a simple scaled indexed address. */
1156 return
1157 force_reg
1158 (Pmode, gen_rtx_PLUS (Pmode,
1159 gen_rtx_MULT (Pmode, reg1,
1160 XEXP (XEXP (idx, 0), 1)),
1161 base));
1162 }
1163
1164 /* If B + C is still a valid base register, then add them. */
1165 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1166 && INTVAL (XEXP (idx, 1)) <= 4096
1167 && INTVAL (XEXP (idx, 1)) >= -4096)
1168 {
1169 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1170 rtx reg1, reg2;
1171
1172 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1173
1174 reg2 = XEXP (XEXP (idx, 0), 0);
1175 if (GET_CODE (reg2) != CONST_INT)
1176 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1177
1178 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1179 gen_rtx_MULT (Pmode,
1180 reg2,
1181 GEN_INT (val)),
1182 reg1));
1183 }
1184
1185 /* Get the index into a register, then add the base + index and
1186 return a register holding the result. */
1187
1188 /* First get A into a register. */
1189 reg1 = XEXP (XEXP (idx, 0), 0);
1190 if (GET_CODE (reg1) != REG)
1191 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1192
1193 /* And get B into a register. */
1194 reg2 = XEXP (idx, 1);
1195 if (GET_CODE (reg2) != REG)
1196 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1197
1198 reg1 = force_reg (Pmode,
1199 gen_rtx_PLUS (Pmode,
1200 gen_rtx_MULT (Pmode, reg1,
1201 XEXP (XEXP (idx, 0), 1)),
1202 reg2));
1203
1204 /* Add the result to our base register and return. */
1205 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1206
1207 }
1208
1209 /* Uh-oh. We might have an address for x[n-100000]. This needs
1210 special handling to avoid creating an indexed memory address
1211 with x-100000 as the base.
1212
1213 If the constant part is small enough, then it's still safe because
1214 there is a guard page at the beginning and end of the data segment.
1215
1216 Scaled references are common enough that we want to try and rearrange the
1217 terms so that we can use indexing for these addresses too. Only
1218 do the optimization for floatint point modes. */
1219
1220 if (GET_CODE (x) == PLUS
1221 && symbolic_expression_p (XEXP (x, 1)))
1222 {
1223 /* Ugly. We modify things here so that the address offset specified
1224 by the index expression is computed first, then added to x to form
1225 the entire address. */
1226
1227 rtx regx1, regx2, regy1, regy2, y;
1228
1229 /* Strip off any CONST. */
1230 y = XEXP (x, 1);
1231 if (GET_CODE (y) == CONST)
1232 y = XEXP (y, 0);
1233
1234 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1235 {
1236 /* See if this looks like
1237 (plus (mult (reg) (shadd_const))
1238 (const (plus (symbol_ref) (const_int))))
1239
1240 Where const_int is small. In that case the const
1241 expression is a valid pointer for indexing.
1242
1243 If const_int is big, but can be divided evenly by shadd_const
1244 and added to (reg). This allows more scaled indexed addresses. */
1245 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1246 && GET_CODE (XEXP (x, 0)) == MULT
1247 && GET_CODE (XEXP (y, 1)) == CONST_INT
1248 && INTVAL (XEXP (y, 1)) >= -4096
1249 && INTVAL (XEXP (y, 1)) <= 4095
1250 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1251 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1252 {
1253 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1254 rtx reg1, reg2;
1255
1256 reg1 = XEXP (x, 1);
1257 if (GET_CODE (reg1) != REG)
1258 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1259
1260 reg2 = XEXP (XEXP (x, 0), 0);
1261 if (GET_CODE (reg2) != REG)
1262 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1263
1264 return force_reg (Pmode,
1265 gen_rtx_PLUS (Pmode,
1266 gen_rtx_MULT (Pmode,
1267 reg2,
1268 GEN_INT (val)),
1269 reg1));
1270 }
1271 else if ((mode == DFmode || mode == SFmode)
1272 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1273 && GET_CODE (XEXP (x, 0)) == MULT
1274 && GET_CODE (XEXP (y, 1)) == CONST_INT
1275 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1276 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1277 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1278 {
1279 regx1
1280 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1281 / INTVAL (XEXP (XEXP (x, 0), 1))));
1282 regx2 = XEXP (XEXP (x, 0), 0);
1283 if (GET_CODE (regx2) != REG)
1284 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1285 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1286 regx2, regx1));
1287 return
1288 force_reg (Pmode,
1289 gen_rtx_PLUS (Pmode,
1290 gen_rtx_MULT (Pmode, regx2,
1291 XEXP (XEXP (x, 0), 1)),
1292 force_reg (Pmode, XEXP (y, 0))));
1293 }
1294 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1295 && INTVAL (XEXP (y, 1)) >= -4096
1296 && INTVAL (XEXP (y, 1)) <= 4095)
1297 {
1298 /* This is safe because of the guard page at the
1299 beginning and end of the data space. Just
1300 return the original address. */
1301 return orig;
1302 }
1303 else
1304 {
1305 /* Doesn't look like one we can optimize. */
1306 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1307 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1308 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1309 regx1 = force_reg (Pmode,
1310 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1311 regx1, regy2));
1312 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1313 }
1314 }
1315 }
1316
1317 return orig;
1318 }
1319
1320 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1321
1322 Compute extra cost of moving data between one register class
1323 and another.
1324
1325 Make moves from SAR so expensive they should never happen. We used to
1326 have 0xffff here, but that generates overflow in rare cases.
1327
1328 Copies involving a FP register and a non-FP register are relatively
1329 expensive because they must go through memory.
1330
1331 Other copies are reasonably cheap. */
1332
1333 static int
1334 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1335 reg_class_t from, reg_class_t to)
1336 {
1337 if (from == SHIFT_REGS)
1338 return 0x100;
1339 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1340 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1341 return 16;
1342 else
1343 return 2;
1344 }
1345
1346 /* For the HPPA, REG and REG+CONST is cost 0
1347 and addresses involving symbolic constants are cost 2.
1348
1349 PIC addresses are very expensive.
1350
1351 It is no coincidence that this has the same structure
1352 as GO_IF_LEGITIMATE_ADDRESS. */
1353
1354 static int
1355 hppa_address_cost (rtx X,
1356 bool speed ATTRIBUTE_UNUSED)
1357 {
1358 switch (GET_CODE (X))
1359 {
1360 case REG:
1361 case PLUS:
1362 case LO_SUM:
1363 return 1;
1364 case HIGH:
1365 return 2;
1366 default:
1367 return 4;
1368 }
1369 }
1370
1371 /* Compute a (partial) cost for rtx X. Return true if the complete
1372 cost has been computed, and false if subexpressions should be
1373 scanned. In either case, *TOTAL contains the cost result. */
1374
1375 static bool
1376 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1377 bool speed ATTRIBUTE_UNUSED)
1378 {
1379 switch (code)
1380 {
1381 case CONST_INT:
1382 if (INTVAL (x) == 0)
1383 *total = 0;
1384 else if (INT_14_BITS (x))
1385 *total = 1;
1386 else
1387 *total = 2;
1388 return true;
1389
1390 case HIGH:
1391 *total = 2;
1392 return true;
1393
1394 case CONST:
1395 case LABEL_REF:
1396 case SYMBOL_REF:
1397 *total = 4;
1398 return true;
1399
1400 case CONST_DOUBLE:
1401 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1402 && outer_code != SET)
1403 *total = 0;
1404 else
1405 *total = 8;
1406 return true;
1407
1408 case MULT:
1409 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1410 *total = COSTS_N_INSNS (3);
1411 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1412 *total = COSTS_N_INSNS (8);
1413 else
1414 *total = COSTS_N_INSNS (20);
1415 return true;
1416
1417 case DIV:
1418 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1419 {
1420 *total = COSTS_N_INSNS (14);
1421 return true;
1422 }
1423 /* FALLTHRU */
1424
1425 case UDIV:
1426 case MOD:
1427 case UMOD:
1428 *total = COSTS_N_INSNS (60);
1429 return true;
1430
1431 case PLUS: /* this includes shNadd insns */
1432 case MINUS:
1433 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1434 *total = COSTS_N_INSNS (3);
1435 else
1436 *total = COSTS_N_INSNS (1);
1437 return true;
1438
1439 case ASHIFT:
1440 case ASHIFTRT:
1441 case LSHIFTRT:
1442 *total = COSTS_N_INSNS (1);
1443 return true;
1444
1445 default:
1446 return false;
1447 }
1448 }
1449
1450 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1451 new rtx with the correct mode. */
1452 static inline rtx
1453 force_mode (enum machine_mode mode, rtx orig)
1454 {
1455 if (mode == GET_MODE (orig))
1456 return orig;
1457
1458 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1459
1460 return gen_rtx_REG (mode, REGNO (orig));
1461 }
1462
1463 /* Return 1 if *X is a thread-local symbol. */
1464
1465 static int
1466 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1467 {
1468 return PA_SYMBOL_REF_TLS_P (*x);
1469 }
1470
1471 /* Return 1 if X contains a thread-local symbol. */
1472
1473 bool
1474 pa_tls_referenced_p (rtx x)
1475 {
1476 if (!TARGET_HAVE_TLS)
1477 return false;
1478
1479 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1480 }
1481
1482 /* Emit insns to move operands[1] into operands[0].
1483
1484 Return 1 if we have written out everything that needs to be done to
1485 do the move. Otherwise, return 0 and the caller will emit the move
1486 normally.
1487
1488 Note SCRATCH_REG may not be in the proper mode depending on how it
1489 will be used. This routine is responsible for creating a new copy
1490 of SCRATCH_REG in the proper mode. */
1491
1492 int
1493 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1494 {
1495 register rtx operand0 = operands[0];
1496 register rtx operand1 = operands[1];
1497 register rtx tem;
1498
1499 /* We can only handle indexed addresses in the destination operand
1500 of floating point stores. Thus, we need to break out indexed
1501 addresses from the destination operand. */
1502 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1503 {
1504 gcc_assert (can_create_pseudo_p ());
1505
1506 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1507 operand0 = replace_equiv_address (operand0, tem);
1508 }
1509
1510 /* On targets with non-equivalent space registers, break out unscaled
1511 indexed addresses from the source operand before the final CSE.
1512 We have to do this because the REG_POINTER flag is not correctly
1513 carried through various optimization passes and CSE may substitute
1514 a pseudo without the pointer set for one with the pointer set. As
1515 a result, we loose various opportunities to create insns with
1516 unscaled indexed addresses. */
1517 if (!TARGET_NO_SPACE_REGS
1518 && !cse_not_expected
1519 && GET_CODE (operand1) == MEM
1520 && GET_CODE (XEXP (operand1, 0)) == PLUS
1521 && REG_P (XEXP (XEXP (operand1, 0), 0))
1522 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1523 operand1
1524 = replace_equiv_address (operand1,
1525 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1526
1527 if (scratch_reg
1528 && reload_in_progress && GET_CODE (operand0) == REG
1529 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1530 operand0 = reg_equiv_mem[REGNO (operand0)];
1531 else if (scratch_reg
1532 && reload_in_progress && GET_CODE (operand0) == SUBREG
1533 && GET_CODE (SUBREG_REG (operand0)) == REG
1534 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1535 {
1536 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1537 the code which tracks sets/uses for delete_output_reload. */
1538 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1539 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1540 SUBREG_BYTE (operand0));
1541 operand0 = alter_subreg (&temp);
1542 }
1543
1544 if (scratch_reg
1545 && reload_in_progress && GET_CODE (operand1) == REG
1546 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1547 operand1 = reg_equiv_mem[REGNO (operand1)];
1548 else if (scratch_reg
1549 && reload_in_progress && GET_CODE (operand1) == SUBREG
1550 && GET_CODE (SUBREG_REG (operand1)) == REG
1551 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1552 {
1553 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1554 the code which tracks sets/uses for delete_output_reload. */
1555 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1556 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1557 SUBREG_BYTE (operand1));
1558 operand1 = alter_subreg (&temp);
1559 }
1560
1561 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1562 && ((tem = find_replacement (&XEXP (operand0, 0)))
1563 != XEXP (operand0, 0)))
1564 operand0 = replace_equiv_address (operand0, tem);
1565
1566 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1567 && ((tem = find_replacement (&XEXP (operand1, 0)))
1568 != XEXP (operand1, 0)))
1569 operand1 = replace_equiv_address (operand1, tem);
1570
1571 /* Handle secondary reloads for loads/stores of FP registers from
1572 REG+D addresses where D does not fit in 5 or 14 bits, including
1573 (subreg (mem (addr))) cases. */
1574 if (scratch_reg
1575 && fp_reg_operand (operand0, mode)
1576 && ((GET_CODE (operand1) == MEM
1577 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1578 XEXP (operand1, 0)))
1579 || ((GET_CODE (operand1) == SUBREG
1580 && GET_CODE (XEXP (operand1, 0)) == MEM
1581 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1582 ? SFmode : DFmode),
1583 XEXP (XEXP (operand1, 0), 0))))))
1584 {
1585 if (GET_CODE (operand1) == SUBREG)
1586 operand1 = XEXP (operand1, 0);
1587
1588 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1589 it in WORD_MODE regardless of what mode it was originally given
1590 to us. */
1591 scratch_reg = force_mode (word_mode, scratch_reg);
1592
1593 /* D might not fit in 14 bits either; for such cases load D into
1594 scratch reg. */
1595 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1596 {
1597 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1598 emit_move_insn (scratch_reg,
1599 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1600 Pmode,
1601 XEXP (XEXP (operand1, 0), 0),
1602 scratch_reg));
1603 }
1604 else
1605 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1606 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1607 replace_equiv_address (operand1, scratch_reg)));
1608 return 1;
1609 }
1610 else if (scratch_reg
1611 && fp_reg_operand (operand1, mode)
1612 && ((GET_CODE (operand0) == MEM
1613 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1614 ? SFmode : DFmode),
1615 XEXP (operand0, 0)))
1616 || ((GET_CODE (operand0) == SUBREG)
1617 && GET_CODE (XEXP (operand0, 0)) == MEM
1618 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1619 ? SFmode : DFmode),
1620 XEXP (XEXP (operand0, 0), 0)))))
1621 {
1622 if (GET_CODE (operand0) == SUBREG)
1623 operand0 = XEXP (operand0, 0);
1624
1625 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1626 it in WORD_MODE regardless of what mode it was originally given
1627 to us. */
1628 scratch_reg = force_mode (word_mode, scratch_reg);
1629
1630 /* D might not fit in 14 bits either; for such cases load D into
1631 scratch reg. */
1632 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1633 {
1634 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1635 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1636 0)),
1637 Pmode,
1638 XEXP (XEXP (operand0, 0),
1639 0),
1640 scratch_reg));
1641 }
1642 else
1643 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1644 emit_insn (gen_rtx_SET (VOIDmode,
1645 replace_equiv_address (operand0, scratch_reg),
1646 operand1));
1647 return 1;
1648 }
1649 /* Handle secondary reloads for loads of FP registers from constant
1650 expressions by forcing the constant into memory.
1651
1652 Use scratch_reg to hold the address of the memory location.
1653
1654 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1655 NO_REGS when presented with a const_int and a register class
1656 containing only FP registers. Doing so unfortunately creates
1657 more problems than it solves. Fix this for 2.5. */
1658 else if (scratch_reg
1659 && CONSTANT_P (operand1)
1660 && fp_reg_operand (operand0, mode))
1661 {
1662 rtx const_mem, xoperands[2];
1663
1664 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1665 it in WORD_MODE regardless of what mode it was originally given
1666 to us. */
1667 scratch_reg = force_mode (word_mode, scratch_reg);
1668
1669 /* Force the constant into memory and put the address of the
1670 memory location into scratch_reg. */
1671 const_mem = force_const_mem (mode, operand1);
1672 xoperands[0] = scratch_reg;
1673 xoperands[1] = XEXP (const_mem, 0);
1674 emit_move_sequence (xoperands, Pmode, 0);
1675
1676 /* Now load the destination register. */
1677 emit_insn (gen_rtx_SET (mode, operand0,
1678 replace_equiv_address (const_mem, scratch_reg)));
1679 return 1;
1680 }
1681 /* Handle secondary reloads for SAR. These occur when trying to load
1682 the SAR from memory, FP register, or with a constant. */
1683 else if (scratch_reg
1684 && GET_CODE (operand0) == REG
1685 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1686 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1687 && (GET_CODE (operand1) == MEM
1688 || GET_CODE (operand1) == CONST_INT
1689 || (GET_CODE (operand1) == REG
1690 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1691 {
1692 /* D might not fit in 14 bits either; for such cases load D into
1693 scratch reg. */
1694 if (GET_CODE (operand1) == MEM
1695 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1696 {
1697 /* We are reloading the address into the scratch register, so we
1698 want to make sure the scratch register is a full register. */
1699 scratch_reg = force_mode (word_mode, scratch_reg);
1700
1701 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1702 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1703 0)),
1704 Pmode,
1705 XEXP (XEXP (operand1, 0),
1706 0),
1707 scratch_reg));
1708
1709 /* Now we are going to load the scratch register from memory,
1710 we want to load it in the same width as the original MEM,
1711 which must be the same as the width of the ultimate destination,
1712 OPERAND0. */
1713 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1714
1715 emit_move_insn (scratch_reg,
1716 replace_equiv_address (operand1, scratch_reg));
1717 }
1718 else
1719 {
1720 /* We want to load the scratch register using the same mode as
1721 the ultimate destination. */
1722 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1723
1724 emit_move_insn (scratch_reg, operand1);
1725 }
1726
1727 /* And emit the insn to set the ultimate destination. We know that
1728 the scratch register has the same mode as the destination at this
1729 point. */
1730 emit_move_insn (operand0, scratch_reg);
1731 return 1;
1732 }
1733 /* Handle the most common case: storing into a register. */
1734 else if (register_operand (operand0, mode))
1735 {
1736 if (register_operand (operand1, mode)
1737 || (GET_CODE (operand1) == CONST_INT
1738 && cint_ok_for_move (INTVAL (operand1)))
1739 || (operand1 == CONST0_RTX (mode))
1740 || (GET_CODE (operand1) == HIGH
1741 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1742 /* Only `general_operands' can come here, so MEM is ok. */
1743 || GET_CODE (operand1) == MEM)
1744 {
1745 /* Various sets are created during RTL generation which don't
1746 have the REG_POINTER flag correctly set. After the CSE pass,
1747 instruction recognition can fail if we don't consistently
1748 set this flag when performing register copies. This should
1749 also improve the opportunities for creating insns that use
1750 unscaled indexing. */
1751 if (REG_P (operand0) && REG_P (operand1))
1752 {
1753 if (REG_POINTER (operand1)
1754 && !REG_POINTER (operand0)
1755 && !HARD_REGISTER_P (operand0))
1756 copy_reg_pointer (operand0, operand1);
1757 }
1758
1759 /* When MEMs are broken out, the REG_POINTER flag doesn't
1760 get set. In some cases, we can set the REG_POINTER flag
1761 from the declaration for the MEM. */
1762 if (REG_P (operand0)
1763 && GET_CODE (operand1) == MEM
1764 && !REG_POINTER (operand0))
1765 {
1766 tree decl = MEM_EXPR (operand1);
1767
1768 /* Set the register pointer flag and register alignment
1769 if the declaration for this memory reference is a
1770 pointer type. */
1771 if (decl)
1772 {
1773 tree type;
1774
1775 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1776 tree operand 1. */
1777 if (TREE_CODE (decl) == COMPONENT_REF)
1778 decl = TREE_OPERAND (decl, 1);
1779
1780 type = TREE_TYPE (decl);
1781 type = strip_array_types (type);
1782
1783 if (POINTER_TYPE_P (type))
1784 {
1785 int align;
1786
1787 type = TREE_TYPE (type);
1788 /* Using TYPE_ALIGN_OK is rather conservative as
1789 only the ada frontend actually sets it. */
1790 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1791 : BITS_PER_UNIT);
1792 mark_reg_pointer (operand0, align);
1793 }
1794 }
1795 }
1796
1797 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1798 return 1;
1799 }
1800 }
1801 else if (GET_CODE (operand0) == MEM)
1802 {
1803 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1804 && !(reload_in_progress || reload_completed))
1805 {
1806 rtx temp = gen_reg_rtx (DFmode);
1807
1808 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1809 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1810 return 1;
1811 }
1812 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1813 {
1814 /* Run this case quickly. */
1815 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1816 return 1;
1817 }
1818 if (! (reload_in_progress || reload_completed))
1819 {
1820 operands[0] = validize_mem (operand0);
1821 operands[1] = operand1 = force_reg (mode, operand1);
1822 }
1823 }
1824
1825 /* Simplify the source if we need to.
1826 Note we do have to handle function labels here, even though we do
1827 not consider them legitimate constants. Loop optimizations can
1828 call the emit_move_xxx with one as a source. */
1829 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1830 || function_label_operand (operand1, mode)
1831 || (GET_CODE (operand1) == HIGH
1832 && symbolic_operand (XEXP (operand1, 0), mode)))
1833 {
1834 int ishighonly = 0;
1835
1836 if (GET_CODE (operand1) == HIGH)
1837 {
1838 ishighonly = 1;
1839 operand1 = XEXP (operand1, 0);
1840 }
1841 if (symbolic_operand (operand1, mode))
1842 {
1843 /* Argh. The assembler and linker can't handle arithmetic
1844 involving plabels.
1845
1846 So we force the plabel into memory, load operand0 from
1847 the memory location, then add in the constant part. */
1848 if ((GET_CODE (operand1) == CONST
1849 && GET_CODE (XEXP (operand1, 0)) == PLUS
1850 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1851 || function_label_operand (operand1, mode))
1852 {
1853 rtx temp, const_part;
1854
1855 /* Figure out what (if any) scratch register to use. */
1856 if (reload_in_progress || reload_completed)
1857 {
1858 scratch_reg = scratch_reg ? scratch_reg : operand0;
1859 /* SCRATCH_REG will hold an address and maybe the actual
1860 data. We want it in WORD_MODE regardless of what mode it
1861 was originally given to us. */
1862 scratch_reg = force_mode (word_mode, scratch_reg);
1863 }
1864 else if (flag_pic)
1865 scratch_reg = gen_reg_rtx (Pmode);
1866
1867 if (GET_CODE (operand1) == CONST)
1868 {
1869 /* Save away the constant part of the expression. */
1870 const_part = XEXP (XEXP (operand1, 0), 1);
1871 gcc_assert (GET_CODE (const_part) == CONST_INT);
1872
1873 /* Force the function label into memory. */
1874 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1875 }
1876 else
1877 {
1878 /* No constant part. */
1879 const_part = NULL_RTX;
1880
1881 /* Force the function label into memory. */
1882 temp = force_const_mem (mode, operand1);
1883 }
1884
1885
1886 /* Get the address of the memory location. PIC-ify it if
1887 necessary. */
1888 temp = XEXP (temp, 0);
1889 if (flag_pic)
1890 temp = legitimize_pic_address (temp, mode, scratch_reg);
1891
1892 /* Put the address of the memory location into our destination
1893 register. */
1894 operands[1] = temp;
1895 emit_move_sequence (operands, mode, scratch_reg);
1896
1897 /* Now load from the memory location into our destination
1898 register. */
1899 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1900 emit_move_sequence (operands, mode, scratch_reg);
1901
1902 /* And add back in the constant part. */
1903 if (const_part != NULL_RTX)
1904 expand_inc (operand0, const_part);
1905
1906 return 1;
1907 }
1908
1909 if (flag_pic)
1910 {
1911 rtx temp;
1912
1913 if (reload_in_progress || reload_completed)
1914 {
1915 temp = scratch_reg ? scratch_reg : operand0;
1916 /* TEMP will hold an address and maybe the actual
1917 data. We want it in WORD_MODE regardless of what mode it
1918 was originally given to us. */
1919 temp = force_mode (word_mode, temp);
1920 }
1921 else
1922 temp = gen_reg_rtx (Pmode);
1923
1924 /* (const (plus (symbol) (const_int))) must be forced to
1925 memory during/after reload if the const_int will not fit
1926 in 14 bits. */
1927 if (GET_CODE (operand1) == CONST
1928 && GET_CODE (XEXP (operand1, 0)) == PLUS
1929 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1930 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1931 && (reload_completed || reload_in_progress)
1932 && flag_pic)
1933 {
1934 rtx const_mem = force_const_mem (mode, operand1);
1935 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1936 mode, temp);
1937 operands[1] = replace_equiv_address (const_mem, operands[1]);
1938 emit_move_sequence (operands, mode, temp);
1939 }
1940 else
1941 {
1942 operands[1] = legitimize_pic_address (operand1, mode, temp);
1943 if (REG_P (operand0) && REG_P (operands[1]))
1944 copy_reg_pointer (operand0, operands[1]);
1945 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1946 }
1947 }
1948 /* On the HPPA, references to data space are supposed to use dp,
1949 register 27, but showing it in the RTL inhibits various cse
1950 and loop optimizations. */
1951 else
1952 {
1953 rtx temp, set;
1954
1955 if (reload_in_progress || reload_completed)
1956 {
1957 temp = scratch_reg ? scratch_reg : operand0;
1958 /* TEMP will hold an address and maybe the actual
1959 data. We want it in WORD_MODE regardless of what mode it
1960 was originally given to us. */
1961 temp = force_mode (word_mode, temp);
1962 }
1963 else
1964 temp = gen_reg_rtx (mode);
1965
1966 /* Loading a SYMBOL_REF into a register makes that register
1967 safe to be used as the base in an indexed address.
1968
1969 Don't mark hard registers though. That loses. */
1970 if (GET_CODE (operand0) == REG
1971 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1972 mark_reg_pointer (operand0, BITS_PER_UNIT);
1973 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1974 mark_reg_pointer (temp, BITS_PER_UNIT);
1975
1976 if (ishighonly)
1977 set = gen_rtx_SET (mode, operand0, temp);
1978 else
1979 set = gen_rtx_SET (VOIDmode,
1980 operand0,
1981 gen_rtx_LO_SUM (mode, temp, operand1));
1982
1983 emit_insn (gen_rtx_SET (VOIDmode,
1984 temp,
1985 gen_rtx_HIGH (mode, operand1)));
1986 emit_insn (set);
1987
1988 }
1989 return 1;
1990 }
1991 else if (pa_tls_referenced_p (operand1))
1992 {
1993 rtx tmp = operand1;
1994 rtx addend = NULL;
1995
1996 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1997 {
1998 addend = XEXP (XEXP (tmp, 0), 1);
1999 tmp = XEXP (XEXP (tmp, 0), 0);
2000 }
2001
2002 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2003 tmp = legitimize_tls_address (tmp);
2004 if (addend)
2005 {
2006 tmp = gen_rtx_PLUS (mode, tmp, addend);
2007 tmp = force_operand (tmp, operands[0]);
2008 }
2009 operands[1] = tmp;
2010 }
2011 else if (GET_CODE (operand1) != CONST_INT
2012 || !cint_ok_for_move (INTVAL (operand1)))
2013 {
2014 rtx insn, temp;
2015 rtx op1 = operand1;
2016 HOST_WIDE_INT value = 0;
2017 HOST_WIDE_INT insv = 0;
2018 int insert = 0;
2019
2020 if (GET_CODE (operand1) == CONST_INT)
2021 value = INTVAL (operand1);
2022
2023 if (TARGET_64BIT
2024 && GET_CODE (operand1) == CONST_INT
2025 && HOST_BITS_PER_WIDE_INT > 32
2026 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2027 {
2028 HOST_WIDE_INT nval;
2029
2030 /* Extract the low order 32 bits of the value and sign extend.
2031 If the new value is the same as the original value, we can
2032 can use the original value as-is. If the new value is
2033 different, we use it and insert the most-significant 32-bits
2034 of the original value into the final result. */
2035 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2036 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2037 if (value != nval)
2038 {
2039 #if HOST_BITS_PER_WIDE_INT > 32
2040 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2041 #endif
2042 insert = 1;
2043 value = nval;
2044 operand1 = GEN_INT (nval);
2045 }
2046 }
2047
2048 if (reload_in_progress || reload_completed)
2049 temp = scratch_reg ? scratch_reg : operand0;
2050 else
2051 temp = gen_reg_rtx (mode);
2052
2053 /* We don't directly split DImode constants on 32-bit targets
2054 because PLUS uses an 11-bit immediate and the insn sequence
2055 generated is not as efficient as the one using HIGH/LO_SUM. */
2056 if (GET_CODE (operand1) == CONST_INT
2057 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2058 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2059 && !insert)
2060 {
2061 /* Directly break constant into high and low parts. This
2062 provides better optimization opportunities because various
2063 passes recognize constants split with PLUS but not LO_SUM.
2064 We use a 14-bit signed low part except when the addition
2065 of 0x4000 to the high part might change the sign of the
2066 high part. */
2067 HOST_WIDE_INT low = value & 0x3fff;
2068 HOST_WIDE_INT high = value & ~ 0x3fff;
2069
2070 if (low >= 0x2000)
2071 {
2072 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2073 high += 0x2000;
2074 else
2075 high += 0x4000;
2076 }
2077
2078 low = value - high;
2079
2080 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2081 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2082 }
2083 else
2084 {
2085 emit_insn (gen_rtx_SET (VOIDmode, temp,
2086 gen_rtx_HIGH (mode, operand1)));
2087 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2088 }
2089
2090 insn = emit_move_insn (operands[0], operands[1]);
2091
2092 /* Now insert the most significant 32 bits of the value
2093 into the register. When we don't have a second register
2094 available, it could take up to nine instructions to load
2095 a 64-bit integer constant. Prior to reload, we force
2096 constants that would take more than three instructions
2097 to load to the constant pool. During and after reload,
2098 we have to handle all possible values. */
2099 if (insert)
2100 {
2101 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2102 register and the value to be inserted is outside the
2103 range that can be loaded with three depdi instructions. */
2104 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2105 {
2106 operand1 = GEN_INT (insv);
2107
2108 emit_insn (gen_rtx_SET (VOIDmode, temp,
2109 gen_rtx_HIGH (mode, operand1)));
2110 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2111 emit_insn (gen_insv (operand0, GEN_INT (32),
2112 const0_rtx, temp));
2113 }
2114 else
2115 {
2116 int len = 5, pos = 27;
2117
2118 /* Insert the bits using the depdi instruction. */
2119 while (pos >= 0)
2120 {
2121 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2122 HOST_WIDE_INT sign = v5 < 0;
2123
2124 /* Left extend the insertion. */
2125 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2126 while (pos > 0 && (insv & 1) == sign)
2127 {
2128 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2129 len += 1;
2130 pos -= 1;
2131 }
2132
2133 emit_insn (gen_insv (operand0, GEN_INT (len),
2134 GEN_INT (pos), GEN_INT (v5)));
2135
2136 len = pos > 0 && pos < 5 ? pos : 5;
2137 pos -= len;
2138 }
2139 }
2140 }
2141
2142 set_unique_reg_note (insn, REG_EQUAL, op1);
2143
2144 return 1;
2145 }
2146 }
2147 /* Now have insn-emit do whatever it normally does. */
2148 return 0;
2149 }
2150
2151 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2152 it will need a link/runtime reloc). */
2153
2154 int
2155 reloc_needed (tree exp)
2156 {
2157 int reloc = 0;
2158
2159 switch (TREE_CODE (exp))
2160 {
2161 case ADDR_EXPR:
2162 return 1;
2163
2164 case POINTER_PLUS_EXPR:
2165 case PLUS_EXPR:
2166 case MINUS_EXPR:
2167 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2168 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2169 break;
2170
2171 CASE_CONVERT:
2172 case NON_LVALUE_EXPR:
2173 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2174 break;
2175
2176 case CONSTRUCTOR:
2177 {
2178 tree value;
2179 unsigned HOST_WIDE_INT ix;
2180
2181 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2182 if (value)
2183 reloc |= reloc_needed (value);
2184 }
2185 break;
2186
2187 case ERROR_MARK:
2188 break;
2189
2190 default:
2191 break;
2192 }
2193 return reloc;
2194 }
2195
2196 /* Does operand (which is a symbolic_operand) live in text space?
2197 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2198 will be true. */
2199
2200 int
2201 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2202 {
2203 if (GET_CODE (operand) == CONST)
2204 operand = XEXP (XEXP (operand, 0), 0);
2205 if (flag_pic)
2206 {
2207 if (GET_CODE (operand) == SYMBOL_REF)
2208 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2209 }
2210 else
2211 {
2212 if (GET_CODE (operand) == SYMBOL_REF)
2213 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2214 }
2215 return 1;
2216 }
2217
2218 \f
2219 /* Return the best assembler insn template
2220 for moving operands[1] into operands[0] as a fullword. */
2221 const char *
2222 singlemove_string (rtx *operands)
2223 {
2224 HOST_WIDE_INT intval;
2225
2226 if (GET_CODE (operands[0]) == MEM)
2227 return "stw %r1,%0";
2228 if (GET_CODE (operands[1]) == MEM)
2229 return "ldw %1,%0";
2230 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2231 {
2232 long i;
2233 REAL_VALUE_TYPE d;
2234
2235 gcc_assert (GET_MODE (operands[1]) == SFmode);
2236
2237 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2238 bit pattern. */
2239 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2240 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2241
2242 operands[1] = GEN_INT (i);
2243 /* Fall through to CONST_INT case. */
2244 }
2245 if (GET_CODE (operands[1]) == CONST_INT)
2246 {
2247 intval = INTVAL (operands[1]);
2248
2249 if (VAL_14_BITS_P (intval))
2250 return "ldi %1,%0";
2251 else if ((intval & 0x7ff) == 0)
2252 return "ldil L'%1,%0";
2253 else if (zdepi_cint_p (intval))
2254 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2255 else
2256 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2257 }
2258 return "copy %1,%0";
2259 }
2260 \f
2261
2262 /* Compute position (in OP[1]) and width (in OP[2])
2263 useful for copying IMM to a register using the zdepi
2264 instructions. Store the immediate value to insert in OP[0]. */
2265 static void
2266 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2267 {
2268 int lsb, len;
2269
2270 /* Find the least significant set bit in IMM. */
2271 for (lsb = 0; lsb < 32; lsb++)
2272 {
2273 if ((imm & 1) != 0)
2274 break;
2275 imm >>= 1;
2276 }
2277
2278 /* Choose variants based on *sign* of the 5-bit field. */
2279 if ((imm & 0x10) == 0)
2280 len = (lsb <= 28) ? 4 : 32 - lsb;
2281 else
2282 {
2283 /* Find the width of the bitstring in IMM. */
2284 for (len = 5; len < 32 - lsb; len++)
2285 {
2286 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2287 break;
2288 }
2289
2290 /* Sign extend IMM as a 5-bit value. */
2291 imm = (imm & 0xf) - 0x10;
2292 }
2293
2294 op[0] = imm;
2295 op[1] = 31 - lsb;
2296 op[2] = len;
2297 }
2298
2299 /* Compute position (in OP[1]) and width (in OP[2])
2300 useful for copying IMM to a register using the depdi,z
2301 instructions. Store the immediate value to insert in OP[0]. */
2302 void
2303 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2304 {
2305 int lsb, len, maxlen;
2306
2307 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2308
2309 /* Find the least significant set bit in IMM. */
2310 for (lsb = 0; lsb < maxlen; lsb++)
2311 {
2312 if ((imm & 1) != 0)
2313 break;
2314 imm >>= 1;
2315 }
2316
2317 /* Choose variants based on *sign* of the 5-bit field. */
2318 if ((imm & 0x10) == 0)
2319 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2320 else
2321 {
2322 /* Find the width of the bitstring in IMM. */
2323 for (len = 5; len < maxlen - lsb; len++)
2324 {
2325 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2326 break;
2327 }
2328
2329 /* Extend length if host is narrow and IMM is negative. */
2330 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2331 len += 32;
2332
2333 /* Sign extend IMM as a 5-bit value. */
2334 imm = (imm & 0xf) - 0x10;
2335 }
2336
2337 op[0] = imm;
2338 op[1] = 63 - lsb;
2339 op[2] = len;
2340 }
2341
2342 /* Output assembler code to perform a doubleword move insn
2343 with operands OPERANDS. */
2344
2345 const char *
2346 output_move_double (rtx *operands)
2347 {
2348 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2349 rtx latehalf[2];
2350 rtx addreg0 = 0, addreg1 = 0;
2351
2352 /* First classify both operands. */
2353
2354 if (REG_P (operands[0]))
2355 optype0 = REGOP;
2356 else if (offsettable_memref_p (operands[0]))
2357 optype0 = OFFSOP;
2358 else if (GET_CODE (operands[0]) == MEM)
2359 optype0 = MEMOP;
2360 else
2361 optype0 = RNDOP;
2362
2363 if (REG_P (operands[1]))
2364 optype1 = REGOP;
2365 else if (CONSTANT_P (operands[1]))
2366 optype1 = CNSTOP;
2367 else if (offsettable_memref_p (operands[1]))
2368 optype1 = OFFSOP;
2369 else if (GET_CODE (operands[1]) == MEM)
2370 optype1 = MEMOP;
2371 else
2372 optype1 = RNDOP;
2373
2374 /* Check for the cases that the operand constraints are not
2375 supposed to allow to happen. */
2376 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2377
2378 /* Handle copies between general and floating registers. */
2379
2380 if (optype0 == REGOP && optype1 == REGOP
2381 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2382 {
2383 if (FP_REG_P (operands[0]))
2384 {
2385 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2386 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2387 return "{fldds|fldd} -16(%%sp),%0";
2388 }
2389 else
2390 {
2391 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2392 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2393 return "{ldws|ldw} -12(%%sp),%R0";
2394 }
2395 }
2396
2397 /* Handle auto decrementing and incrementing loads and stores
2398 specifically, since the structure of the function doesn't work
2399 for them without major modification. Do it better when we learn
2400 this port about the general inc/dec addressing of PA.
2401 (This was written by tege. Chide him if it doesn't work.) */
2402
2403 if (optype0 == MEMOP)
2404 {
2405 /* We have to output the address syntax ourselves, since print_operand
2406 doesn't deal with the addresses we want to use. Fix this later. */
2407
2408 rtx addr = XEXP (operands[0], 0);
2409 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2410 {
2411 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2412
2413 operands[0] = XEXP (addr, 0);
2414 gcc_assert (GET_CODE (operands[1]) == REG
2415 && GET_CODE (operands[0]) == REG);
2416
2417 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2418
2419 /* No overlap between high target register and address
2420 register. (We do this in a non-obvious way to
2421 save a register file writeback) */
2422 if (GET_CODE (addr) == POST_INC)
2423 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2424 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2425 }
2426 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2427 {
2428 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2429
2430 operands[0] = XEXP (addr, 0);
2431 gcc_assert (GET_CODE (operands[1]) == REG
2432 && GET_CODE (operands[0]) == REG);
2433
2434 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2435 /* No overlap between high target register and address
2436 register. (We do this in a non-obvious way to save a
2437 register file writeback) */
2438 if (GET_CODE (addr) == PRE_INC)
2439 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2440 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2441 }
2442 }
2443 if (optype1 == MEMOP)
2444 {
2445 /* We have to output the address syntax ourselves, since print_operand
2446 doesn't deal with the addresses we want to use. Fix this later. */
2447
2448 rtx addr = XEXP (operands[1], 0);
2449 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2450 {
2451 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2452
2453 operands[1] = XEXP (addr, 0);
2454 gcc_assert (GET_CODE (operands[0]) == REG
2455 && GET_CODE (operands[1]) == REG);
2456
2457 if (!reg_overlap_mentioned_p (high_reg, addr))
2458 {
2459 /* No overlap between high target register and address
2460 register. (We do this in a non-obvious way to
2461 save a register file writeback) */
2462 if (GET_CODE (addr) == POST_INC)
2463 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2464 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2465 }
2466 else
2467 {
2468 /* This is an undefined situation. We should load into the
2469 address register *and* update that register. Probably
2470 we don't need to handle this at all. */
2471 if (GET_CODE (addr) == POST_INC)
2472 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2473 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2474 }
2475 }
2476 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2477 {
2478 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2479
2480 operands[1] = XEXP (addr, 0);
2481 gcc_assert (GET_CODE (operands[0]) == REG
2482 && GET_CODE (operands[1]) == REG);
2483
2484 if (!reg_overlap_mentioned_p (high_reg, addr))
2485 {
2486 /* No overlap between high target register and address
2487 register. (We do this in a non-obvious way to
2488 save a register file writeback) */
2489 if (GET_CODE (addr) == PRE_INC)
2490 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2491 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2492 }
2493 else
2494 {
2495 /* This is an undefined situation. We should load into the
2496 address register *and* update that register. Probably
2497 we don't need to handle this at all. */
2498 if (GET_CODE (addr) == PRE_INC)
2499 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2500 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2501 }
2502 }
2503 else if (GET_CODE (addr) == PLUS
2504 && GET_CODE (XEXP (addr, 0)) == MULT)
2505 {
2506 rtx xoperands[4];
2507 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2508
2509 if (!reg_overlap_mentioned_p (high_reg, addr))
2510 {
2511 xoperands[0] = high_reg;
2512 xoperands[1] = XEXP (addr, 1);
2513 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2514 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2515 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2516 xoperands);
2517 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2518 }
2519 else
2520 {
2521 xoperands[0] = high_reg;
2522 xoperands[1] = XEXP (addr, 1);
2523 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2524 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2525 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2526 xoperands);
2527 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2528 }
2529 }
2530 }
2531
2532 /* If an operand is an unoffsettable memory ref, find a register
2533 we can increment temporarily to make it refer to the second word. */
2534
2535 if (optype0 == MEMOP)
2536 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2537
2538 if (optype1 == MEMOP)
2539 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2540
2541 /* Ok, we can do one word at a time.
2542 Normally we do the low-numbered word first.
2543
2544 In either case, set up in LATEHALF the operands to use
2545 for the high-numbered word and in some cases alter the
2546 operands in OPERANDS to be suitable for the low-numbered word. */
2547
2548 if (optype0 == REGOP)
2549 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2550 else if (optype0 == OFFSOP)
2551 latehalf[0] = adjust_address (operands[0], SImode, 4);
2552 else
2553 latehalf[0] = operands[0];
2554
2555 if (optype1 == REGOP)
2556 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2557 else if (optype1 == OFFSOP)
2558 latehalf[1] = adjust_address (operands[1], SImode, 4);
2559 else if (optype1 == CNSTOP)
2560 split_double (operands[1], &operands[1], &latehalf[1]);
2561 else
2562 latehalf[1] = operands[1];
2563
2564 /* If the first move would clobber the source of the second one,
2565 do them in the other order.
2566
2567 This can happen in two cases:
2568
2569 mem -> register where the first half of the destination register
2570 is the same register used in the memory's address. Reload
2571 can create such insns.
2572
2573 mem in this case will be either register indirect or register
2574 indirect plus a valid offset.
2575
2576 register -> register move where REGNO(dst) == REGNO(src + 1)
2577 someone (Tim/Tege?) claimed this can happen for parameter loads.
2578
2579 Handle mem -> register case first. */
2580 if (optype0 == REGOP
2581 && (optype1 == MEMOP || optype1 == OFFSOP)
2582 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2583 operands[1], 0))
2584 {
2585 /* Do the late half first. */
2586 if (addreg1)
2587 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2588 output_asm_insn (singlemove_string (latehalf), latehalf);
2589
2590 /* Then clobber. */
2591 if (addreg1)
2592 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2593 return singlemove_string (operands);
2594 }
2595
2596 /* Now handle register -> register case. */
2597 if (optype0 == REGOP && optype1 == REGOP
2598 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2599 {
2600 output_asm_insn (singlemove_string (latehalf), latehalf);
2601 return singlemove_string (operands);
2602 }
2603
2604 /* Normal case: do the two words, low-numbered first. */
2605
2606 output_asm_insn (singlemove_string (operands), operands);
2607
2608 /* Make any unoffsettable addresses point at high-numbered word. */
2609 if (addreg0)
2610 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2611 if (addreg1)
2612 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2613
2614 /* Do that word. */
2615 output_asm_insn (singlemove_string (latehalf), latehalf);
2616
2617 /* Undo the adds we just did. */
2618 if (addreg0)
2619 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2620 if (addreg1)
2621 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2622
2623 return "";
2624 }
2625 \f
2626 const char *
2627 output_fp_move_double (rtx *operands)
2628 {
2629 if (FP_REG_P (operands[0]))
2630 {
2631 if (FP_REG_P (operands[1])
2632 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2633 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2634 else
2635 output_asm_insn ("fldd%F1 %1,%0", operands);
2636 }
2637 else if (FP_REG_P (operands[1]))
2638 {
2639 output_asm_insn ("fstd%F0 %1,%0", operands);
2640 }
2641 else
2642 {
2643 rtx xoperands[2];
2644
2645 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2646
2647 /* This is a pain. You have to be prepared to deal with an
2648 arbitrary address here including pre/post increment/decrement.
2649
2650 so avoid this in the MD. */
2651 gcc_assert (GET_CODE (operands[0]) == REG);
2652
2653 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2654 xoperands[0] = operands[0];
2655 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2656 }
2657 return "";
2658 }
2659 \f
2660 /* Return a REG that occurs in ADDR with coefficient 1.
2661 ADDR can be effectively incremented by incrementing REG. */
2662
2663 static rtx
2664 find_addr_reg (rtx addr)
2665 {
2666 while (GET_CODE (addr) == PLUS)
2667 {
2668 if (GET_CODE (XEXP (addr, 0)) == REG)
2669 addr = XEXP (addr, 0);
2670 else if (GET_CODE (XEXP (addr, 1)) == REG)
2671 addr = XEXP (addr, 1);
2672 else if (CONSTANT_P (XEXP (addr, 0)))
2673 addr = XEXP (addr, 1);
2674 else if (CONSTANT_P (XEXP (addr, 1)))
2675 addr = XEXP (addr, 0);
2676 else
2677 gcc_unreachable ();
2678 }
2679 gcc_assert (GET_CODE (addr) == REG);
2680 return addr;
2681 }
2682
2683 /* Emit code to perform a block move.
2684
2685 OPERANDS[0] is the destination pointer as a REG, clobbered.
2686 OPERANDS[1] is the source pointer as a REG, clobbered.
2687 OPERANDS[2] is a register for temporary storage.
2688 OPERANDS[3] is a register for temporary storage.
2689 OPERANDS[4] is the size as a CONST_INT
2690 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2691 OPERANDS[6] is another temporary register. */
2692
2693 const char *
2694 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2695 {
2696 int align = INTVAL (operands[5]);
2697 unsigned long n_bytes = INTVAL (operands[4]);
2698
2699 /* We can't move more than a word at a time because the PA
2700 has no longer integer move insns. (Could use fp mem ops?) */
2701 if (align > (TARGET_64BIT ? 8 : 4))
2702 align = (TARGET_64BIT ? 8 : 4);
2703
2704 /* Note that we know each loop below will execute at least twice
2705 (else we would have open-coded the copy). */
2706 switch (align)
2707 {
2708 case 8:
2709 /* Pre-adjust the loop counter. */
2710 operands[4] = GEN_INT (n_bytes - 16);
2711 output_asm_insn ("ldi %4,%2", operands);
2712
2713 /* Copying loop. */
2714 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2715 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2716 output_asm_insn ("std,ma %3,8(%0)", operands);
2717 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2718 output_asm_insn ("std,ma %6,8(%0)", operands);
2719
2720 /* Handle the residual. There could be up to 7 bytes of
2721 residual to copy! */
2722 if (n_bytes % 16 != 0)
2723 {
2724 operands[4] = GEN_INT (n_bytes % 8);
2725 if (n_bytes % 16 >= 8)
2726 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2727 if (n_bytes % 8 != 0)
2728 output_asm_insn ("ldd 0(%1),%6", operands);
2729 if (n_bytes % 16 >= 8)
2730 output_asm_insn ("std,ma %3,8(%0)", operands);
2731 if (n_bytes % 8 != 0)
2732 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2733 }
2734 return "";
2735
2736 case 4:
2737 /* Pre-adjust the loop counter. */
2738 operands[4] = GEN_INT (n_bytes - 8);
2739 output_asm_insn ("ldi %4,%2", operands);
2740
2741 /* Copying loop. */
2742 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2743 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2744 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2745 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2746 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2747
2748 /* Handle the residual. There could be up to 7 bytes of
2749 residual to copy! */
2750 if (n_bytes % 8 != 0)
2751 {
2752 operands[4] = GEN_INT (n_bytes % 4);
2753 if (n_bytes % 8 >= 4)
2754 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2755 if (n_bytes % 4 != 0)
2756 output_asm_insn ("ldw 0(%1),%6", operands);
2757 if (n_bytes % 8 >= 4)
2758 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2759 if (n_bytes % 4 != 0)
2760 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2761 }
2762 return "";
2763
2764 case 2:
2765 /* Pre-adjust the loop counter. */
2766 operands[4] = GEN_INT (n_bytes - 4);
2767 output_asm_insn ("ldi %4,%2", operands);
2768
2769 /* Copying loop. */
2770 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2771 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2772 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2773 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2774 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2775
2776 /* Handle the residual. */
2777 if (n_bytes % 4 != 0)
2778 {
2779 if (n_bytes % 4 >= 2)
2780 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2781 if (n_bytes % 2 != 0)
2782 output_asm_insn ("ldb 0(%1),%6", operands);
2783 if (n_bytes % 4 >= 2)
2784 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2785 if (n_bytes % 2 != 0)
2786 output_asm_insn ("stb %6,0(%0)", operands);
2787 }
2788 return "";
2789
2790 case 1:
2791 /* Pre-adjust the loop counter. */
2792 operands[4] = GEN_INT (n_bytes - 2);
2793 output_asm_insn ("ldi %4,%2", operands);
2794
2795 /* Copying loop. */
2796 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2797 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2798 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2799 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2800 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2801
2802 /* Handle the residual. */
2803 if (n_bytes % 2 != 0)
2804 {
2805 output_asm_insn ("ldb 0(%1),%3", operands);
2806 output_asm_insn ("stb %3,0(%0)", operands);
2807 }
2808 return "";
2809
2810 default:
2811 gcc_unreachable ();
2812 }
2813 }
2814
2815 /* Count the number of insns necessary to handle this block move.
2816
2817 Basic structure is the same as emit_block_move, except that we
2818 count insns rather than emit them. */
2819
2820 static int
2821 compute_movmem_length (rtx insn)
2822 {
2823 rtx pat = PATTERN (insn);
2824 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2825 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2826 unsigned int n_insns = 0;
2827
2828 /* We can't move more than four bytes at a time because the PA
2829 has no longer integer move insns. (Could use fp mem ops?) */
2830 if (align > (TARGET_64BIT ? 8 : 4))
2831 align = (TARGET_64BIT ? 8 : 4);
2832
2833 /* The basic copying loop. */
2834 n_insns = 6;
2835
2836 /* Residuals. */
2837 if (n_bytes % (2 * align) != 0)
2838 {
2839 if ((n_bytes % (2 * align)) >= align)
2840 n_insns += 2;
2841
2842 if ((n_bytes % align) != 0)
2843 n_insns += 2;
2844 }
2845
2846 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2847 return n_insns * 4;
2848 }
2849
2850 /* Emit code to perform a block clear.
2851
2852 OPERANDS[0] is the destination pointer as a REG, clobbered.
2853 OPERANDS[1] is a register for temporary storage.
2854 OPERANDS[2] is the size as a CONST_INT
2855 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2856
2857 const char *
2858 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2859 {
2860 int align = INTVAL (operands[3]);
2861 unsigned long n_bytes = INTVAL (operands[2]);
2862
2863 /* We can't clear more than a word at a time because the PA
2864 has no longer integer move insns. */
2865 if (align > (TARGET_64BIT ? 8 : 4))
2866 align = (TARGET_64BIT ? 8 : 4);
2867
2868 /* Note that we know each loop below will execute at least twice
2869 (else we would have open-coded the copy). */
2870 switch (align)
2871 {
2872 case 8:
2873 /* Pre-adjust the loop counter. */
2874 operands[2] = GEN_INT (n_bytes - 16);
2875 output_asm_insn ("ldi %2,%1", operands);
2876
2877 /* Loop. */
2878 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2879 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2880 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2881
2882 /* Handle the residual. There could be up to 7 bytes of
2883 residual to copy! */
2884 if (n_bytes % 16 != 0)
2885 {
2886 operands[2] = GEN_INT (n_bytes % 8);
2887 if (n_bytes % 16 >= 8)
2888 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2889 if (n_bytes % 8 != 0)
2890 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2891 }
2892 return "";
2893
2894 case 4:
2895 /* Pre-adjust the loop counter. */
2896 operands[2] = GEN_INT (n_bytes - 8);
2897 output_asm_insn ("ldi %2,%1", operands);
2898
2899 /* Loop. */
2900 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2901 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2902 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2903
2904 /* Handle the residual. There could be up to 7 bytes of
2905 residual to copy! */
2906 if (n_bytes % 8 != 0)
2907 {
2908 operands[2] = GEN_INT (n_bytes % 4);
2909 if (n_bytes % 8 >= 4)
2910 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2911 if (n_bytes % 4 != 0)
2912 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2913 }
2914 return "";
2915
2916 case 2:
2917 /* Pre-adjust the loop counter. */
2918 operands[2] = GEN_INT (n_bytes - 4);
2919 output_asm_insn ("ldi %2,%1", operands);
2920
2921 /* Loop. */
2922 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2923 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2924 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2925
2926 /* Handle the residual. */
2927 if (n_bytes % 4 != 0)
2928 {
2929 if (n_bytes % 4 >= 2)
2930 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2931 if (n_bytes % 2 != 0)
2932 output_asm_insn ("stb %%r0,0(%0)", operands);
2933 }
2934 return "";
2935
2936 case 1:
2937 /* Pre-adjust the loop counter. */
2938 operands[2] = GEN_INT (n_bytes - 2);
2939 output_asm_insn ("ldi %2,%1", operands);
2940
2941 /* Loop. */
2942 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2943 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2944 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2945
2946 /* Handle the residual. */
2947 if (n_bytes % 2 != 0)
2948 output_asm_insn ("stb %%r0,0(%0)", operands);
2949
2950 return "";
2951
2952 default:
2953 gcc_unreachable ();
2954 }
2955 }
2956
2957 /* Count the number of insns necessary to handle this block move.
2958
2959 Basic structure is the same as emit_block_move, except that we
2960 count insns rather than emit them. */
2961
2962 static int
2963 compute_clrmem_length (rtx insn)
2964 {
2965 rtx pat = PATTERN (insn);
2966 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2967 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2968 unsigned int n_insns = 0;
2969
2970 /* We can't clear more than a word at a time because the PA
2971 has no longer integer move insns. */
2972 if (align > (TARGET_64BIT ? 8 : 4))
2973 align = (TARGET_64BIT ? 8 : 4);
2974
2975 /* The basic loop. */
2976 n_insns = 4;
2977
2978 /* Residuals. */
2979 if (n_bytes % (2 * align) != 0)
2980 {
2981 if ((n_bytes % (2 * align)) >= align)
2982 n_insns++;
2983
2984 if ((n_bytes % align) != 0)
2985 n_insns++;
2986 }
2987
2988 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2989 return n_insns * 4;
2990 }
2991 \f
2992
2993 const char *
2994 output_and (rtx *operands)
2995 {
2996 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2997 {
2998 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2999 int ls0, ls1, ms0, p, len;
3000
3001 for (ls0 = 0; ls0 < 32; ls0++)
3002 if ((mask & (1 << ls0)) == 0)
3003 break;
3004
3005 for (ls1 = ls0; ls1 < 32; ls1++)
3006 if ((mask & (1 << ls1)) != 0)
3007 break;
3008
3009 for (ms0 = ls1; ms0 < 32; ms0++)
3010 if ((mask & (1 << ms0)) == 0)
3011 break;
3012
3013 gcc_assert (ms0 == 32);
3014
3015 if (ls1 == 32)
3016 {
3017 len = ls0;
3018
3019 gcc_assert (len);
3020
3021 operands[2] = GEN_INT (len);
3022 return "{extru|extrw,u} %1,31,%2,%0";
3023 }
3024 else
3025 {
3026 /* We could use this `depi' for the case above as well, but `depi'
3027 requires one more register file access than an `extru'. */
3028
3029 p = 31 - ls0;
3030 len = ls1 - ls0;
3031
3032 operands[2] = GEN_INT (p);
3033 operands[3] = GEN_INT (len);
3034 return "{depi|depwi} 0,%2,%3,%0";
3035 }
3036 }
3037 else
3038 return "and %1,%2,%0";
3039 }
3040
3041 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3042 storing the result in operands[0]. */
3043 const char *
3044 output_64bit_and (rtx *operands)
3045 {
3046 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3047 {
3048 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3049 int ls0, ls1, ms0, p, len;
3050
3051 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3052 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3053 break;
3054
3055 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3056 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3057 break;
3058
3059 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3060 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3061 break;
3062
3063 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3064
3065 if (ls1 == HOST_BITS_PER_WIDE_INT)
3066 {
3067 len = ls0;
3068
3069 gcc_assert (len);
3070
3071 operands[2] = GEN_INT (len);
3072 return "extrd,u %1,63,%2,%0";
3073 }
3074 else
3075 {
3076 /* We could use this `depi' for the case above as well, but `depi'
3077 requires one more register file access than an `extru'. */
3078
3079 p = 63 - ls0;
3080 len = ls1 - ls0;
3081
3082 operands[2] = GEN_INT (p);
3083 operands[3] = GEN_INT (len);
3084 return "depdi 0,%2,%3,%0";
3085 }
3086 }
3087 else
3088 return "and %1,%2,%0";
3089 }
3090
3091 const char *
3092 output_ior (rtx *operands)
3093 {
3094 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3095 int bs0, bs1, p, len;
3096
3097 if (INTVAL (operands[2]) == 0)
3098 return "copy %1,%0";
3099
3100 for (bs0 = 0; bs0 < 32; bs0++)
3101 if ((mask & (1 << bs0)) != 0)
3102 break;
3103
3104 for (bs1 = bs0; bs1 < 32; bs1++)
3105 if ((mask & (1 << bs1)) == 0)
3106 break;
3107
3108 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3109
3110 p = 31 - bs0;
3111 len = bs1 - bs0;
3112
3113 operands[2] = GEN_INT (p);
3114 operands[3] = GEN_INT (len);
3115 return "{depi|depwi} -1,%2,%3,%0";
3116 }
3117
3118 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3119 storing the result in operands[0]. */
3120 const char *
3121 output_64bit_ior (rtx *operands)
3122 {
3123 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3124 int bs0, bs1, p, len;
3125
3126 if (INTVAL (operands[2]) == 0)
3127 return "copy %1,%0";
3128
3129 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3130 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3131 break;
3132
3133 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3134 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3135 break;
3136
3137 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3138 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3139
3140 p = 63 - bs0;
3141 len = bs1 - bs0;
3142
3143 operands[2] = GEN_INT (p);
3144 operands[3] = GEN_INT (len);
3145 return "depdi -1,%2,%3,%0";
3146 }
3147 \f
3148 /* Target hook for assembling integer objects. This code handles
3149 aligned SI and DI integers specially since function references
3150 must be preceded by P%. */
3151
3152 static bool
3153 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3154 {
3155 if (size == UNITS_PER_WORD
3156 && aligned_p
3157 && function_label_operand (x, VOIDmode))
3158 {
3159 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3160 output_addr_const (asm_out_file, x);
3161 fputc ('\n', asm_out_file);
3162 return true;
3163 }
3164 return default_assemble_integer (x, size, aligned_p);
3165 }
3166 \f
3167 /* Output an ascii string. */
3168 void
3169 output_ascii (FILE *file, const char *p, int size)
3170 {
3171 int i;
3172 int chars_output;
3173 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3174
3175 /* The HP assembler can only take strings of 256 characters at one
3176 time. This is a limitation on input line length, *not* the
3177 length of the string. Sigh. Even worse, it seems that the
3178 restriction is in number of input characters (see \xnn &
3179 \whatever). So we have to do this very carefully. */
3180
3181 fputs ("\t.STRING \"", file);
3182
3183 chars_output = 0;
3184 for (i = 0; i < size; i += 4)
3185 {
3186 int co = 0;
3187 int io = 0;
3188 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3189 {
3190 register unsigned int c = (unsigned char) p[i + io];
3191
3192 if (c == '\"' || c == '\\')
3193 partial_output[co++] = '\\';
3194 if (c >= ' ' && c < 0177)
3195 partial_output[co++] = c;
3196 else
3197 {
3198 unsigned int hexd;
3199 partial_output[co++] = '\\';
3200 partial_output[co++] = 'x';
3201 hexd = c / 16 - 0 + '0';
3202 if (hexd > '9')
3203 hexd -= '9' - 'a' + 1;
3204 partial_output[co++] = hexd;
3205 hexd = c % 16 - 0 + '0';
3206 if (hexd > '9')
3207 hexd -= '9' - 'a' + 1;
3208 partial_output[co++] = hexd;
3209 }
3210 }
3211 if (chars_output + co > 243)
3212 {
3213 fputs ("\"\n\t.STRING \"", file);
3214 chars_output = 0;
3215 }
3216 fwrite (partial_output, 1, (size_t) co, file);
3217 chars_output += co;
3218 co = 0;
3219 }
3220 fputs ("\"\n", file);
3221 }
3222
3223 /* Try to rewrite floating point comparisons & branches to avoid
3224 useless add,tr insns.
3225
3226 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3227 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3228 first attempt to remove useless add,tr insns. It is zero
3229 for the second pass as reorg sometimes leaves bogus REG_DEAD
3230 notes lying around.
3231
3232 When CHECK_NOTES is zero we can only eliminate add,tr insns
3233 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3234 instructions. */
3235 static void
3236 remove_useless_addtr_insns (int check_notes)
3237 {
3238 rtx insn;
3239 static int pass = 0;
3240
3241 /* This is fairly cheap, so always run it when optimizing. */
3242 if (optimize > 0)
3243 {
3244 int fcmp_count = 0;
3245 int fbranch_count = 0;
3246
3247 /* Walk all the insns in this function looking for fcmp & fbranch
3248 instructions. Keep track of how many of each we find. */
3249 for (insn = get_insns (); insn; insn = next_insn (insn))
3250 {
3251 rtx tmp;
3252
3253 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3254 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3255 continue;
3256
3257 tmp = PATTERN (insn);
3258
3259 /* It must be a set. */
3260 if (GET_CODE (tmp) != SET)
3261 continue;
3262
3263 /* If the destination is CCFP, then we've found an fcmp insn. */
3264 tmp = SET_DEST (tmp);
3265 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3266 {
3267 fcmp_count++;
3268 continue;
3269 }
3270
3271 tmp = PATTERN (insn);
3272 /* If this is an fbranch instruction, bump the fbranch counter. */
3273 if (GET_CODE (tmp) == SET
3274 && SET_DEST (tmp) == pc_rtx
3275 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3276 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3277 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3278 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3279 {
3280 fbranch_count++;
3281 continue;
3282 }
3283 }
3284
3285
3286 /* Find all floating point compare + branch insns. If possible,
3287 reverse the comparison & the branch to avoid add,tr insns. */
3288 for (insn = get_insns (); insn; insn = next_insn (insn))
3289 {
3290 rtx tmp, next;
3291
3292 /* Ignore anything that isn't an INSN. */
3293 if (GET_CODE (insn) != INSN)
3294 continue;
3295
3296 tmp = PATTERN (insn);
3297
3298 /* It must be a set. */
3299 if (GET_CODE (tmp) != SET)
3300 continue;
3301
3302 /* The destination must be CCFP, which is register zero. */
3303 tmp = SET_DEST (tmp);
3304 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3305 continue;
3306
3307 /* INSN should be a set of CCFP.
3308
3309 See if the result of this insn is used in a reversed FP
3310 conditional branch. If so, reverse our condition and
3311 the branch. Doing so avoids useless add,tr insns. */
3312 next = next_insn (insn);
3313 while (next)
3314 {
3315 /* Jumps, calls and labels stop our search. */
3316 if (GET_CODE (next) == JUMP_INSN
3317 || GET_CODE (next) == CALL_INSN
3318 || GET_CODE (next) == CODE_LABEL)
3319 break;
3320
3321 /* As does another fcmp insn. */
3322 if (GET_CODE (next) == INSN
3323 && GET_CODE (PATTERN (next)) == SET
3324 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3325 && REGNO (SET_DEST (PATTERN (next))) == 0)
3326 break;
3327
3328 next = next_insn (next);
3329 }
3330
3331 /* Is NEXT_INSN a branch? */
3332 if (next
3333 && GET_CODE (next) == JUMP_INSN)
3334 {
3335 rtx pattern = PATTERN (next);
3336
3337 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3338 and CCFP dies, then reverse our conditional and the branch
3339 to avoid the add,tr. */
3340 if (GET_CODE (pattern) == SET
3341 && SET_DEST (pattern) == pc_rtx
3342 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3343 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3344 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3345 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3346 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3347 && (fcmp_count == fbranch_count
3348 || (check_notes
3349 && find_regno_note (next, REG_DEAD, 0))))
3350 {
3351 /* Reverse the branch. */
3352 tmp = XEXP (SET_SRC (pattern), 1);
3353 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3354 XEXP (SET_SRC (pattern), 2) = tmp;
3355 INSN_CODE (next) = -1;
3356
3357 /* Reverse our condition. */
3358 tmp = PATTERN (insn);
3359 PUT_CODE (XEXP (tmp, 1),
3360 (reverse_condition_maybe_unordered
3361 (GET_CODE (XEXP (tmp, 1)))));
3362 }
3363 }
3364 }
3365 }
3366
3367 pass = !pass;
3368
3369 }
3370 \f
3371 /* You may have trouble believing this, but this is the 32 bit HP-PA
3372 stack layout. Wow.
3373
3374 Offset Contents
3375
3376 Variable arguments (optional; any number may be allocated)
3377
3378 SP-(4*(N+9)) arg word N
3379 : :
3380 SP-56 arg word 5
3381 SP-52 arg word 4
3382
3383 Fixed arguments (must be allocated; may remain unused)
3384
3385 SP-48 arg word 3
3386 SP-44 arg word 2
3387 SP-40 arg word 1
3388 SP-36 arg word 0
3389
3390 Frame Marker
3391
3392 SP-32 External Data Pointer (DP)
3393 SP-28 External sr4
3394 SP-24 External/stub RP (RP')
3395 SP-20 Current RP
3396 SP-16 Static Link
3397 SP-12 Clean up
3398 SP-8 Calling Stub RP (RP'')
3399 SP-4 Previous SP
3400
3401 Top of Frame
3402
3403 SP-0 Stack Pointer (points to next available address)
3404
3405 */
3406
3407 /* This function saves registers as follows. Registers marked with ' are
3408 this function's registers (as opposed to the previous function's).
3409 If a frame_pointer isn't needed, r4 is saved as a general register;
3410 the space for the frame pointer is still allocated, though, to keep
3411 things simple.
3412
3413
3414 Top of Frame
3415
3416 SP (FP') Previous FP
3417 SP + 4 Alignment filler (sigh)
3418 SP + 8 Space for locals reserved here.
3419 .
3420 .
3421 .
3422 SP + n All call saved register used.
3423 .
3424 .
3425 .
3426 SP + o All call saved fp registers used.
3427 .
3428 .
3429 .
3430 SP + p (SP') points to next available address.
3431
3432 */
3433
3434 /* Global variables set by output_function_prologue(). */
3435 /* Size of frame. Need to know this to emit return insns from
3436 leaf procedures. */
3437 static HOST_WIDE_INT actual_fsize, local_fsize;
3438 static int save_fregs;
3439
3440 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3441 Handle case where DISP > 8k by using the add_high_const patterns.
3442
3443 Note in DISP > 8k case, we will leave the high part of the address
3444 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3445
3446 static void
3447 store_reg (int reg, HOST_WIDE_INT disp, int base)
3448 {
3449 rtx insn, dest, src, basereg;
3450
3451 src = gen_rtx_REG (word_mode, reg);
3452 basereg = gen_rtx_REG (Pmode, base);
3453 if (VAL_14_BITS_P (disp))
3454 {
3455 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3456 insn = emit_move_insn (dest, src);
3457 }
3458 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3459 {
3460 rtx delta = GEN_INT (disp);
3461 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3462
3463 emit_move_insn (tmpreg, delta);
3464 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3465 if (DO_FRAME_NOTES)
3466 {
3467 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3468 gen_rtx_SET (VOIDmode, tmpreg,
3469 gen_rtx_PLUS (Pmode, basereg, delta)));
3470 RTX_FRAME_RELATED_P (insn) = 1;
3471 }
3472 dest = gen_rtx_MEM (word_mode, tmpreg);
3473 insn = emit_move_insn (dest, src);
3474 }
3475 else
3476 {
3477 rtx delta = GEN_INT (disp);
3478 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3479 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3480
3481 emit_move_insn (tmpreg, high);
3482 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3483 insn = emit_move_insn (dest, src);
3484 if (DO_FRAME_NOTES)
3485 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3486 gen_rtx_SET (VOIDmode,
3487 gen_rtx_MEM (word_mode,
3488 gen_rtx_PLUS (word_mode,
3489 basereg,
3490 delta)),
3491 src));
3492 }
3493
3494 if (DO_FRAME_NOTES)
3495 RTX_FRAME_RELATED_P (insn) = 1;
3496 }
3497
3498 /* Emit RTL to store REG at the memory location specified by BASE and then
3499 add MOD to BASE. MOD must be <= 8k. */
3500
3501 static void
3502 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3503 {
3504 rtx insn, basereg, srcreg, delta;
3505
3506 gcc_assert (VAL_14_BITS_P (mod));
3507
3508 basereg = gen_rtx_REG (Pmode, base);
3509 srcreg = gen_rtx_REG (word_mode, reg);
3510 delta = GEN_INT (mod);
3511
3512 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3513 if (DO_FRAME_NOTES)
3514 {
3515 RTX_FRAME_RELATED_P (insn) = 1;
3516
3517 /* RTX_FRAME_RELATED_P must be set on each frame related set
3518 in a parallel with more than one element. */
3519 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3520 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3521 }
3522 }
3523
3524 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3525 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3526 whether to add a frame note or not.
3527
3528 In the DISP > 8k case, we leave the high part of the address in %r1.
3529 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3530
3531 static void
3532 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3533 {
3534 rtx insn;
3535
3536 if (VAL_14_BITS_P (disp))
3537 {
3538 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3539 plus_constant (gen_rtx_REG (Pmode, base), disp));
3540 }
3541 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3542 {
3543 rtx basereg = gen_rtx_REG (Pmode, base);
3544 rtx delta = GEN_INT (disp);
3545 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3546
3547 emit_move_insn (tmpreg, delta);
3548 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3549 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3550 if (DO_FRAME_NOTES)
3551 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3552 gen_rtx_SET (VOIDmode, tmpreg,
3553 gen_rtx_PLUS (Pmode, basereg, delta)));
3554 }
3555 else
3556 {
3557 rtx basereg = gen_rtx_REG (Pmode, base);
3558 rtx delta = GEN_INT (disp);
3559 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3560
3561 emit_move_insn (tmpreg,
3562 gen_rtx_PLUS (Pmode, basereg,
3563 gen_rtx_HIGH (Pmode, delta)));
3564 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3565 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3566 }
3567
3568 if (DO_FRAME_NOTES && note)
3569 RTX_FRAME_RELATED_P (insn) = 1;
3570 }
3571
3572 HOST_WIDE_INT
3573 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3574 {
3575 int freg_saved = 0;
3576 int i, j;
3577
3578 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3579 be consistent with the rounding and size calculation done here.
3580 Change them at the same time. */
3581
3582 /* We do our own stack alignment. First, round the size of the
3583 stack locals up to a word boundary. */
3584 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3585
3586 /* Space for previous frame pointer + filler. If any frame is
3587 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3588 waste some space here for the sake of HP compatibility. The
3589 first slot is only used when the frame pointer is needed. */
3590 if (size || frame_pointer_needed)
3591 size += STARTING_FRAME_OFFSET;
3592
3593 /* If the current function calls __builtin_eh_return, then we need
3594 to allocate stack space for registers that will hold data for
3595 the exception handler. */
3596 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3597 {
3598 unsigned int i;
3599
3600 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3601 continue;
3602 size += i * UNITS_PER_WORD;
3603 }
3604
3605 /* Account for space used by the callee general register saves. */
3606 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3607 if (df_regs_ever_live_p (i))
3608 size += UNITS_PER_WORD;
3609
3610 /* Account for space used by the callee floating point register saves. */
3611 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3612 if (df_regs_ever_live_p (i)
3613 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3614 {
3615 freg_saved = 1;
3616
3617 /* We always save both halves of the FP register, so always
3618 increment the frame size by 8 bytes. */
3619 size += 8;
3620 }
3621
3622 /* If any of the floating registers are saved, account for the
3623 alignment needed for the floating point register save block. */
3624 if (freg_saved)
3625 {
3626 size = (size + 7) & ~7;
3627 if (fregs_live)
3628 *fregs_live = 1;
3629 }
3630
3631 /* The various ABIs include space for the outgoing parameters in the
3632 size of the current function's stack frame. We don't need to align
3633 for the outgoing arguments as their alignment is set by the final
3634 rounding for the frame as a whole. */
3635 size += crtl->outgoing_args_size;
3636
3637 /* Allocate space for the fixed frame marker. This space must be
3638 allocated for any function that makes calls or allocates
3639 stack space. */
3640 if (!current_function_is_leaf || size)
3641 size += TARGET_64BIT ? 48 : 32;
3642
3643 /* Finally, round to the preferred stack boundary. */
3644 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3645 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3646 }
3647
3648 /* Generate the assembly code for function entry. FILE is a stdio
3649 stream to output the code to. SIZE is an int: how many units of
3650 temporary storage to allocate.
3651
3652 Refer to the array `regs_ever_live' to determine which registers to
3653 save; `regs_ever_live[I]' is nonzero if register number I is ever
3654 used in the function. This function is responsible for knowing
3655 which registers should not be saved even if used. */
3656
3657 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3658 of memory. If any fpu reg is used in the function, we allocate
3659 such a block here, at the bottom of the frame, just in case it's needed.
3660
3661 If this function is a leaf procedure, then we may choose not
3662 to do a "save" insn. The decision about whether or not
3663 to do this is made in regclass.c. */
3664
3665 static void
3666 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3667 {
3668 /* The function's label and associated .PROC must never be
3669 separated and must be output *after* any profiling declarations
3670 to avoid changing spaces/subspaces within a procedure. */
3671 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3672 fputs ("\t.PROC\n", file);
3673
3674 /* hppa_expand_prologue does the dirty work now. We just need
3675 to output the assembler directives which denote the start
3676 of a function. */
3677 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3678 if (current_function_is_leaf)
3679 fputs (",NO_CALLS", file);
3680 else
3681 fputs (",CALLS", file);
3682 if (rp_saved)
3683 fputs (",SAVE_RP", file);
3684
3685 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3686 at the beginning of the frame and that it is used as the frame
3687 pointer for the frame. We do this because our current frame
3688 layout doesn't conform to that specified in the HP runtime
3689 documentation and we need a way to indicate to programs such as
3690 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3691 isn't used by HP compilers but is supported by the assembler.
3692 However, SAVE_SP is supposed to indicate that the previous stack
3693 pointer has been saved in the frame marker. */
3694 if (frame_pointer_needed)
3695 fputs (",SAVE_SP", file);
3696
3697 /* Pass on information about the number of callee register saves
3698 performed in the prologue.
3699
3700 The compiler is supposed to pass the highest register number
3701 saved, the assembler then has to adjust that number before
3702 entering it into the unwind descriptor (to account for any
3703 caller saved registers with lower register numbers than the
3704 first callee saved register). */
3705 if (gr_saved)
3706 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3707
3708 if (fr_saved)
3709 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3710
3711 fputs ("\n\t.ENTRY\n", file);
3712
3713 remove_useless_addtr_insns (0);
3714 }
3715
3716 void
3717 hppa_expand_prologue (void)
3718 {
3719 int merge_sp_adjust_with_store = 0;
3720 HOST_WIDE_INT size = get_frame_size ();
3721 HOST_WIDE_INT offset;
3722 int i;
3723 rtx insn, tmpreg;
3724
3725 gr_saved = 0;
3726 fr_saved = 0;
3727 save_fregs = 0;
3728
3729 /* Compute total size for frame pointer, filler, locals and rounding to
3730 the next word boundary. Similar code appears in compute_frame_size
3731 and must be changed in tandem with this code. */
3732 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3733 if (local_fsize || frame_pointer_needed)
3734 local_fsize += STARTING_FRAME_OFFSET;
3735
3736 actual_fsize = compute_frame_size (size, &save_fregs);
3737
3738 /* Compute a few things we will use often. */
3739 tmpreg = gen_rtx_REG (word_mode, 1);
3740
3741 /* Save RP first. The calling conventions manual states RP will
3742 always be stored into the caller's frame at sp - 20 or sp - 16
3743 depending on which ABI is in use. */
3744 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3745 {
3746 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3747 rp_saved = true;
3748 }
3749 else
3750 rp_saved = false;
3751
3752 /* Allocate the local frame and set up the frame pointer if needed. */
3753 if (actual_fsize != 0)
3754 {
3755 if (frame_pointer_needed)
3756 {
3757 /* Copy the old frame pointer temporarily into %r1. Set up the
3758 new stack pointer, then store away the saved old frame pointer
3759 into the stack at sp and at the same time update the stack
3760 pointer by actual_fsize bytes. Two versions, first
3761 handles small (<8k) frames. The second handles large (>=8k)
3762 frames. */
3763 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3764 if (DO_FRAME_NOTES)
3765 RTX_FRAME_RELATED_P (insn) = 1;
3766
3767 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3768 if (DO_FRAME_NOTES)
3769 RTX_FRAME_RELATED_P (insn) = 1;
3770
3771 if (VAL_14_BITS_P (actual_fsize))
3772 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3773 else
3774 {
3775 /* It is incorrect to store the saved frame pointer at *sp,
3776 then increment sp (writes beyond the current stack boundary).
3777
3778 So instead use stwm to store at *sp and post-increment the
3779 stack pointer as an atomic operation. Then increment sp to
3780 finish allocating the new frame. */
3781 HOST_WIDE_INT adjust1 = 8192 - 64;
3782 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3783
3784 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3785 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3786 adjust2, 1);
3787 }
3788
3789 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3790 we need to store the previous stack pointer (frame pointer)
3791 into the frame marker on targets that use the HP unwind
3792 library. This allows the HP unwind library to be used to
3793 unwind GCC frames. However, we are not fully compatible
3794 with the HP library because our frame layout differs from
3795 that specified in the HP runtime specification.
3796
3797 We don't want a frame note on this instruction as the frame
3798 marker moves during dynamic stack allocation.
3799
3800 This instruction also serves as a blockage to prevent
3801 register spills from being scheduled before the stack
3802 pointer is raised. This is necessary as we store
3803 registers using the frame pointer as a base register,
3804 and the frame pointer is set before sp is raised. */
3805 if (TARGET_HPUX_UNWIND_LIBRARY)
3806 {
3807 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3808 GEN_INT (TARGET_64BIT ? -8 : -4));
3809
3810 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3811 frame_pointer_rtx);
3812 }
3813 else
3814 emit_insn (gen_blockage ());
3815 }
3816 /* no frame pointer needed. */
3817 else
3818 {
3819 /* In some cases we can perform the first callee register save
3820 and allocating the stack frame at the same time. If so, just
3821 make a note of it and defer allocating the frame until saving
3822 the callee registers. */
3823 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3824 merge_sp_adjust_with_store = 1;
3825 /* Can not optimize. Adjust the stack frame by actual_fsize
3826 bytes. */
3827 else
3828 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3829 actual_fsize, 1);
3830 }
3831 }
3832
3833 /* Normal register save.
3834
3835 Do not save the frame pointer in the frame_pointer_needed case. It
3836 was done earlier. */
3837 if (frame_pointer_needed)
3838 {
3839 offset = local_fsize;
3840
3841 /* Saving the EH return data registers in the frame is the simplest
3842 way to get the frame unwind information emitted. We put them
3843 just before the general registers. */
3844 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3845 {
3846 unsigned int i, regno;
3847
3848 for (i = 0; ; ++i)
3849 {
3850 regno = EH_RETURN_DATA_REGNO (i);
3851 if (regno == INVALID_REGNUM)
3852 break;
3853
3854 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3855 offset += UNITS_PER_WORD;
3856 }
3857 }
3858
3859 for (i = 18; i >= 4; i--)
3860 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3861 {
3862 store_reg (i, offset, FRAME_POINTER_REGNUM);
3863 offset += UNITS_PER_WORD;
3864 gr_saved++;
3865 }
3866 /* Account for %r3 which is saved in a special place. */
3867 gr_saved++;
3868 }
3869 /* No frame pointer needed. */
3870 else
3871 {
3872 offset = local_fsize - actual_fsize;
3873
3874 /* Saving the EH return data registers in the frame is the simplest
3875 way to get the frame unwind information emitted. */
3876 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3877 {
3878 unsigned int i, regno;
3879
3880 for (i = 0; ; ++i)
3881 {
3882 regno = EH_RETURN_DATA_REGNO (i);
3883 if (regno == INVALID_REGNUM)
3884 break;
3885
3886 /* If merge_sp_adjust_with_store is nonzero, then we can
3887 optimize the first save. */
3888 if (merge_sp_adjust_with_store)
3889 {
3890 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3891 merge_sp_adjust_with_store = 0;
3892 }
3893 else
3894 store_reg (regno, offset, STACK_POINTER_REGNUM);
3895 offset += UNITS_PER_WORD;
3896 }
3897 }
3898
3899 for (i = 18; i >= 3; i--)
3900 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3901 {
3902 /* If merge_sp_adjust_with_store is nonzero, then we can
3903 optimize the first GR save. */
3904 if (merge_sp_adjust_with_store)
3905 {
3906 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3907 merge_sp_adjust_with_store = 0;
3908 }
3909 else
3910 store_reg (i, offset, STACK_POINTER_REGNUM);
3911 offset += UNITS_PER_WORD;
3912 gr_saved++;
3913 }
3914
3915 /* If we wanted to merge the SP adjustment with a GR save, but we never
3916 did any GR saves, then just emit the adjustment here. */
3917 if (merge_sp_adjust_with_store)
3918 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3919 actual_fsize, 1);
3920 }
3921
3922 /* The hppa calling conventions say that %r19, the pic offset
3923 register, is saved at sp - 32 (in this function's frame)
3924 when generating PIC code. FIXME: What is the correct thing
3925 to do for functions which make no calls and allocate no
3926 frame? Do we need to allocate a frame, or can we just omit
3927 the save? For now we'll just omit the save.
3928
3929 We don't want a note on this insn as the frame marker can
3930 move if there is a dynamic stack allocation. */
3931 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3932 {
3933 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3934
3935 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3936
3937 }
3938
3939 /* Align pointer properly (doubleword boundary). */
3940 offset = (offset + 7) & ~7;
3941
3942 /* Floating point register store. */
3943 if (save_fregs)
3944 {
3945 rtx base;
3946
3947 /* First get the frame or stack pointer to the start of the FP register
3948 save area. */
3949 if (frame_pointer_needed)
3950 {
3951 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3952 base = frame_pointer_rtx;
3953 }
3954 else
3955 {
3956 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3957 base = stack_pointer_rtx;
3958 }
3959
3960 /* Now actually save the FP registers. */
3961 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3962 {
3963 if (df_regs_ever_live_p (i)
3964 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3965 {
3966 rtx addr, insn, reg;
3967 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3968 reg = gen_rtx_REG (DFmode, i);
3969 insn = emit_move_insn (addr, reg);
3970 if (DO_FRAME_NOTES)
3971 {
3972 RTX_FRAME_RELATED_P (insn) = 1;
3973 if (TARGET_64BIT)
3974 {
3975 rtx mem = gen_rtx_MEM (DFmode,
3976 plus_constant (base, offset));
3977 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3978 gen_rtx_SET (VOIDmode, mem, reg));
3979 }
3980 else
3981 {
3982 rtx meml = gen_rtx_MEM (SFmode,
3983 plus_constant (base, offset));
3984 rtx memr = gen_rtx_MEM (SFmode,
3985 plus_constant (base, offset + 4));
3986 rtx regl = gen_rtx_REG (SFmode, i);
3987 rtx regr = gen_rtx_REG (SFmode, i + 1);
3988 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3989 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3990 rtvec vec;
3991
3992 RTX_FRAME_RELATED_P (setl) = 1;
3993 RTX_FRAME_RELATED_P (setr) = 1;
3994 vec = gen_rtvec (2, setl, setr);
3995 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3996 gen_rtx_SEQUENCE (VOIDmode, vec));
3997 }
3998 }
3999 offset += GET_MODE_SIZE (DFmode);
4000 fr_saved++;
4001 }
4002 }
4003 }
4004 }
4005
4006 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4007 Handle case where DISP > 8k by using the add_high_const patterns. */
4008
4009 static void
4010 load_reg (int reg, HOST_WIDE_INT disp, int base)
4011 {
4012 rtx dest = gen_rtx_REG (word_mode, reg);
4013 rtx basereg = gen_rtx_REG (Pmode, base);
4014 rtx src;
4015
4016 if (VAL_14_BITS_P (disp))
4017 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4018 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4019 {
4020 rtx delta = GEN_INT (disp);
4021 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4022
4023 emit_move_insn (tmpreg, delta);
4024 if (TARGET_DISABLE_INDEXING)
4025 {
4026 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4027 src = gen_rtx_MEM (word_mode, tmpreg);
4028 }
4029 else
4030 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4031 }
4032 else
4033 {
4034 rtx delta = GEN_INT (disp);
4035 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4036 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4037
4038 emit_move_insn (tmpreg, high);
4039 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4040 }
4041
4042 emit_move_insn (dest, src);
4043 }
4044
4045 /* Update the total code bytes output to the text section. */
4046
4047 static void
4048 update_total_code_bytes (unsigned int nbytes)
4049 {
4050 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4051 && !IN_NAMED_SECTION_P (cfun->decl))
4052 {
4053 unsigned int old_total = total_code_bytes;
4054
4055 total_code_bytes += nbytes;
4056
4057 /* Be prepared to handle overflows. */
4058 if (old_total > total_code_bytes)
4059 total_code_bytes = UINT_MAX;
4060 }
4061 }
4062
4063 /* This function generates the assembly code for function exit.
4064 Args are as for output_function_prologue ().
4065
4066 The function epilogue should not depend on the current stack
4067 pointer! It should use the frame pointer only. This is mandatory
4068 because of alloca; we also take advantage of it to omit stack
4069 adjustments before returning. */
4070
4071 static void
4072 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4073 {
4074 rtx insn = get_last_insn ();
4075
4076 last_address = 0;
4077
4078 /* hppa_expand_epilogue does the dirty work now. We just need
4079 to output the assembler directives which denote the end
4080 of a function.
4081
4082 To make debuggers happy, emit a nop if the epilogue was completely
4083 eliminated due to a volatile call as the last insn in the
4084 current function. That way the return address (in %r2) will
4085 always point to a valid instruction in the current function. */
4086
4087 /* Get the last real insn. */
4088 if (GET_CODE (insn) == NOTE)
4089 insn = prev_real_insn (insn);
4090
4091 /* If it is a sequence, then look inside. */
4092 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4093 insn = XVECEXP (PATTERN (insn), 0, 0);
4094
4095 /* If insn is a CALL_INSN, then it must be a call to a volatile
4096 function (otherwise there would be epilogue insns). */
4097 if (insn && GET_CODE (insn) == CALL_INSN)
4098 {
4099 fputs ("\tnop\n", file);
4100 last_address += 4;
4101 }
4102
4103 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4104
4105 if (TARGET_SOM && TARGET_GAS)
4106 {
4107 /* We done with this subspace except possibly for some additional
4108 debug information. Forget that we are in this subspace to ensure
4109 that the next function is output in its own subspace. */
4110 in_section = NULL;
4111 cfun->machine->in_nsubspa = 2;
4112 }
4113
4114 if (INSN_ADDRESSES_SET_P ())
4115 {
4116 insn = get_last_nonnote_insn ();
4117 last_address += INSN_ADDRESSES (INSN_UID (insn));
4118 if (INSN_P (insn))
4119 last_address += insn_default_length (insn);
4120 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4121 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4122 }
4123 else
4124 last_address = UINT_MAX;
4125
4126 /* Finally, update the total number of code bytes output so far. */
4127 update_total_code_bytes (last_address);
4128 }
4129
4130 void
4131 hppa_expand_epilogue (void)
4132 {
4133 rtx tmpreg;
4134 HOST_WIDE_INT offset;
4135 HOST_WIDE_INT ret_off = 0;
4136 int i;
4137 int merge_sp_adjust_with_load = 0;
4138
4139 /* We will use this often. */
4140 tmpreg = gen_rtx_REG (word_mode, 1);
4141
4142 /* Try to restore RP early to avoid load/use interlocks when
4143 RP gets used in the return (bv) instruction. This appears to still
4144 be necessary even when we schedule the prologue and epilogue. */
4145 if (rp_saved)
4146 {
4147 ret_off = TARGET_64BIT ? -16 : -20;
4148 if (frame_pointer_needed)
4149 {
4150 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4151 ret_off = 0;
4152 }
4153 else
4154 {
4155 /* No frame pointer, and stack is smaller than 8k. */
4156 if (VAL_14_BITS_P (ret_off - actual_fsize))
4157 {
4158 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4159 ret_off = 0;
4160 }
4161 }
4162 }
4163
4164 /* General register restores. */
4165 if (frame_pointer_needed)
4166 {
4167 offset = local_fsize;
4168
4169 /* If the current function calls __builtin_eh_return, then we need
4170 to restore the saved EH data registers. */
4171 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4172 {
4173 unsigned int i, regno;
4174
4175 for (i = 0; ; ++i)
4176 {
4177 regno = EH_RETURN_DATA_REGNO (i);
4178 if (regno == INVALID_REGNUM)
4179 break;
4180
4181 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4182 offset += UNITS_PER_WORD;
4183 }
4184 }
4185
4186 for (i = 18; i >= 4; i--)
4187 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4188 {
4189 load_reg (i, offset, FRAME_POINTER_REGNUM);
4190 offset += UNITS_PER_WORD;
4191 }
4192 }
4193 else
4194 {
4195 offset = local_fsize - actual_fsize;
4196
4197 /* If the current function calls __builtin_eh_return, then we need
4198 to restore the saved EH data registers. */
4199 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4200 {
4201 unsigned int i, regno;
4202
4203 for (i = 0; ; ++i)
4204 {
4205 regno = EH_RETURN_DATA_REGNO (i);
4206 if (regno == INVALID_REGNUM)
4207 break;
4208
4209 /* Only for the first load.
4210 merge_sp_adjust_with_load holds the register load
4211 with which we will merge the sp adjustment. */
4212 if (merge_sp_adjust_with_load == 0
4213 && local_fsize == 0
4214 && VAL_14_BITS_P (-actual_fsize))
4215 merge_sp_adjust_with_load = regno;
4216 else
4217 load_reg (regno, offset, STACK_POINTER_REGNUM);
4218 offset += UNITS_PER_WORD;
4219 }
4220 }
4221
4222 for (i = 18; i >= 3; i--)
4223 {
4224 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4225 {
4226 /* Only for the first load.
4227 merge_sp_adjust_with_load holds the register load
4228 with which we will merge the sp adjustment. */
4229 if (merge_sp_adjust_with_load == 0
4230 && local_fsize == 0
4231 && VAL_14_BITS_P (-actual_fsize))
4232 merge_sp_adjust_with_load = i;
4233 else
4234 load_reg (i, offset, STACK_POINTER_REGNUM);
4235 offset += UNITS_PER_WORD;
4236 }
4237 }
4238 }
4239
4240 /* Align pointer properly (doubleword boundary). */
4241 offset = (offset + 7) & ~7;
4242
4243 /* FP register restores. */
4244 if (save_fregs)
4245 {
4246 /* Adjust the register to index off of. */
4247 if (frame_pointer_needed)
4248 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4249 else
4250 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4251
4252 /* Actually do the restores now. */
4253 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4254 if (df_regs_ever_live_p (i)
4255 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4256 {
4257 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4258 rtx dest = gen_rtx_REG (DFmode, i);
4259 emit_move_insn (dest, src);
4260 }
4261 }
4262
4263 /* Emit a blockage insn here to keep these insns from being moved to
4264 an earlier spot in the epilogue, or into the main instruction stream.
4265
4266 This is necessary as we must not cut the stack back before all the
4267 restores are finished. */
4268 emit_insn (gen_blockage ());
4269
4270 /* Reset stack pointer (and possibly frame pointer). The stack
4271 pointer is initially set to fp + 64 to avoid a race condition. */
4272 if (frame_pointer_needed)
4273 {
4274 rtx delta = GEN_INT (-64);
4275
4276 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4277 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4278 }
4279 /* If we were deferring a callee register restore, do it now. */
4280 else if (merge_sp_adjust_with_load)
4281 {
4282 rtx delta = GEN_INT (-actual_fsize);
4283 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4284
4285 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4286 }
4287 else if (actual_fsize != 0)
4288 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4289 - actual_fsize, 0);
4290
4291 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4292 frame greater than 8k), do so now. */
4293 if (ret_off != 0)
4294 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4295
4296 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4297 {
4298 rtx sa = EH_RETURN_STACKADJ_RTX;
4299
4300 emit_insn (gen_blockage ());
4301 emit_insn (TARGET_64BIT
4302 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4303 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4304 }
4305 }
4306
4307 rtx
4308 hppa_pic_save_rtx (void)
4309 {
4310 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4311 }
4312
4313 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4314 #define NO_DEFERRED_PROFILE_COUNTERS 0
4315 #endif
4316
4317
4318 /* Vector of funcdef numbers. */
4319 static VEC(int,heap) *funcdef_nos;
4320
4321 /* Output deferred profile counters. */
4322 static void
4323 output_deferred_profile_counters (void)
4324 {
4325 unsigned int i;
4326 int align, n;
4327
4328 if (VEC_empty (int, funcdef_nos))
4329 return;
4330
4331 switch_to_section (data_section);
4332 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4333 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4334
4335 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4336 {
4337 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4338 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4339 }
4340
4341 VEC_free (int, heap, funcdef_nos);
4342 }
4343
4344 void
4345 hppa_profile_hook (int label_no)
4346 {
4347 /* We use SImode for the address of the function in both 32 and
4348 64-bit code to avoid having to provide DImode versions of the
4349 lcla2 and load_offset_label_address insn patterns. */
4350 rtx reg = gen_reg_rtx (SImode);
4351 rtx label_rtx = gen_label_rtx ();
4352 rtx begin_label_rtx, call_insn;
4353 char begin_label_name[16];
4354
4355 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4356 label_no);
4357 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4358
4359 if (TARGET_64BIT)
4360 emit_move_insn (arg_pointer_rtx,
4361 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4362 GEN_INT (64)));
4363
4364 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4365
4366 /* The address of the function is loaded into %r25 with an instruction-
4367 relative sequence that avoids the use of relocations. The sequence
4368 is split so that the load_offset_label_address instruction can
4369 occupy the delay slot of the call to _mcount. */
4370 if (TARGET_PA_20)
4371 emit_insn (gen_lcla2 (reg, label_rtx));
4372 else
4373 emit_insn (gen_lcla1 (reg, label_rtx));
4374
4375 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4376 reg, begin_label_rtx, label_rtx));
4377
4378 #if !NO_DEFERRED_PROFILE_COUNTERS
4379 {
4380 rtx count_label_rtx, addr, r24;
4381 char count_label_name[16];
4382
4383 VEC_safe_push (int, heap, funcdef_nos, label_no);
4384 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4385 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4386
4387 addr = force_reg (Pmode, count_label_rtx);
4388 r24 = gen_rtx_REG (Pmode, 24);
4389 emit_move_insn (r24, addr);
4390
4391 call_insn =
4392 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4393 gen_rtx_SYMBOL_REF (Pmode,
4394 "_mcount")),
4395 GEN_INT (TARGET_64BIT ? 24 : 12)));
4396
4397 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4398 }
4399 #else
4400
4401 call_insn =
4402 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4403 gen_rtx_SYMBOL_REF (Pmode,
4404 "_mcount")),
4405 GEN_INT (TARGET_64BIT ? 16 : 8)));
4406
4407 #endif
4408
4409 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4410 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4411
4412 /* Indicate the _mcount call cannot throw, nor will it execute a
4413 non-local goto. */
4414 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4415 }
4416
4417 /* Fetch the return address for the frame COUNT steps up from
4418 the current frame, after the prologue. FRAMEADDR is the
4419 frame pointer of the COUNT frame.
4420
4421 We want to ignore any export stub remnants here. To handle this,
4422 we examine the code at the return address, and if it is an export
4423 stub, we return a memory rtx for the stub return address stored
4424 at frame-24.
4425
4426 The value returned is used in two different ways:
4427
4428 1. To find a function's caller.
4429
4430 2. To change the return address for a function.
4431
4432 This function handles most instances of case 1; however, it will
4433 fail if there are two levels of stubs to execute on the return
4434 path. The only way I believe that can happen is if the return value
4435 needs a parameter relocation, which never happens for C code.
4436
4437 This function handles most instances of case 2; however, it will
4438 fail if we did not originally have stub code on the return path
4439 but will need stub code on the new return path. This can happen if
4440 the caller & callee are both in the main program, but the new
4441 return location is in a shared library. */
4442
4443 rtx
4444 return_addr_rtx (int count, rtx frameaddr)
4445 {
4446 rtx label;
4447 rtx rp;
4448 rtx saved_rp;
4449 rtx ins;
4450
4451 /* Instruction stream at the normal return address for the export stub:
4452
4453 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4454 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4455 0x00011820 | stub+16: mtsp r1,sr0
4456 0xe0400002 | stub+20: be,n 0(sr0,rp)
4457
4458 0xe0400002 must be specified as -532676606 so that it won't be
4459 rejected as an invalid immediate operand on 64-bit hosts. */
4460
4461 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4462 int i;
4463
4464 if (count != 0)
4465 return NULL_RTX;
4466
4467 rp = get_hard_reg_initial_val (Pmode, 2);
4468
4469 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4470 return rp;
4471
4472 /* If there is no export stub then just use the value saved from
4473 the return pointer register. */
4474
4475 saved_rp = gen_reg_rtx (Pmode);
4476 emit_move_insn (saved_rp, rp);
4477
4478 /* Get pointer to the instruction stream. We have to mask out the
4479 privilege level from the two low order bits of the return address
4480 pointer here so that ins will point to the start of the first
4481 instruction that would have been executed if we returned. */
4482 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4483 label = gen_label_rtx ();
4484
4485 /* Check the instruction stream at the normal return address for the
4486 export stub. If it is an export stub, than our return address is
4487 really in -24[frameaddr]. */
4488
4489 for (i = 0; i < 3; i++)
4490 {
4491 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4492 rtx op1 = GEN_INT (insns[i]);
4493 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4494 }
4495
4496 /* Here we know that our return address points to an export
4497 stub. We don't want to return the address of the export stub,
4498 but rather the return address of the export stub. That return
4499 address is stored at -24[frameaddr]. */
4500
4501 emit_move_insn (saved_rp,
4502 gen_rtx_MEM (Pmode,
4503 memory_address (Pmode,
4504 plus_constant (frameaddr,
4505 -24))));
4506
4507 emit_label (label);
4508
4509 return saved_rp;
4510 }
4511
4512 void
4513 emit_bcond_fp (rtx operands[])
4514 {
4515 enum rtx_code code = GET_CODE (operands[0]);
4516 rtx operand0 = operands[1];
4517 rtx operand1 = operands[2];
4518 rtx label = operands[3];
4519
4520 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4521 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4522
4523 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4524 gen_rtx_IF_THEN_ELSE (VOIDmode,
4525 gen_rtx_fmt_ee (NE,
4526 VOIDmode,
4527 gen_rtx_REG (CCFPmode, 0),
4528 const0_rtx),
4529 gen_rtx_LABEL_REF (VOIDmode, label),
4530 pc_rtx)));
4531
4532 }
4533
4534 /* Adjust the cost of a scheduling dependency. Return the new cost of
4535 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4536
4537 static int
4538 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4539 {
4540 enum attr_type attr_type;
4541
4542 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4543 true dependencies as they are described with bypasses now. */
4544 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4545 return cost;
4546
4547 if (! recog_memoized (insn))
4548 return 0;
4549
4550 attr_type = get_attr_type (insn);
4551
4552 switch (REG_NOTE_KIND (link))
4553 {
4554 case REG_DEP_ANTI:
4555 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4556 cycles later. */
4557
4558 if (attr_type == TYPE_FPLOAD)
4559 {
4560 rtx pat = PATTERN (insn);
4561 rtx dep_pat = PATTERN (dep_insn);
4562 if (GET_CODE (pat) == PARALLEL)
4563 {
4564 /* This happens for the fldXs,mb patterns. */
4565 pat = XVECEXP (pat, 0, 0);
4566 }
4567 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4568 /* If this happens, we have to extend this to schedule
4569 optimally. Return 0 for now. */
4570 return 0;
4571
4572 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4573 {
4574 if (! recog_memoized (dep_insn))
4575 return 0;
4576 switch (get_attr_type (dep_insn))
4577 {
4578 case TYPE_FPALU:
4579 case TYPE_FPMULSGL:
4580 case TYPE_FPMULDBL:
4581 case TYPE_FPDIVSGL:
4582 case TYPE_FPDIVDBL:
4583 case TYPE_FPSQRTSGL:
4584 case TYPE_FPSQRTDBL:
4585 /* A fpload can't be issued until one cycle before a
4586 preceding arithmetic operation has finished if
4587 the target of the fpload is any of the sources
4588 (or destination) of the arithmetic operation. */
4589 return insn_default_latency (dep_insn) - 1;
4590
4591 default:
4592 return 0;
4593 }
4594 }
4595 }
4596 else if (attr_type == TYPE_FPALU)
4597 {
4598 rtx pat = PATTERN (insn);
4599 rtx dep_pat = PATTERN (dep_insn);
4600 if (GET_CODE (pat) == PARALLEL)
4601 {
4602 /* This happens for the fldXs,mb patterns. */
4603 pat = XVECEXP (pat, 0, 0);
4604 }
4605 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4606 /* If this happens, we have to extend this to schedule
4607 optimally. Return 0 for now. */
4608 return 0;
4609
4610 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4611 {
4612 if (! recog_memoized (dep_insn))
4613 return 0;
4614 switch (get_attr_type (dep_insn))
4615 {
4616 case TYPE_FPDIVSGL:
4617 case TYPE_FPDIVDBL:
4618 case TYPE_FPSQRTSGL:
4619 case TYPE_FPSQRTDBL:
4620 /* An ALU flop can't be issued until two cycles before a
4621 preceding divide or sqrt operation has finished if
4622 the target of the ALU flop is any of the sources
4623 (or destination) of the divide or sqrt operation. */
4624 return insn_default_latency (dep_insn) - 2;
4625
4626 default:
4627 return 0;
4628 }
4629 }
4630 }
4631
4632 /* For other anti dependencies, the cost is 0. */
4633 return 0;
4634
4635 case REG_DEP_OUTPUT:
4636 /* Output dependency; DEP_INSN writes a register that INSN writes some
4637 cycles later. */
4638 if (attr_type == TYPE_FPLOAD)
4639 {
4640 rtx pat = PATTERN (insn);
4641 rtx dep_pat = PATTERN (dep_insn);
4642 if (GET_CODE (pat) == PARALLEL)
4643 {
4644 /* This happens for the fldXs,mb patterns. */
4645 pat = XVECEXP (pat, 0, 0);
4646 }
4647 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4648 /* If this happens, we have to extend this to schedule
4649 optimally. Return 0 for now. */
4650 return 0;
4651
4652 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4653 {
4654 if (! recog_memoized (dep_insn))
4655 return 0;
4656 switch (get_attr_type (dep_insn))
4657 {
4658 case TYPE_FPALU:
4659 case TYPE_FPMULSGL:
4660 case TYPE_FPMULDBL:
4661 case TYPE_FPDIVSGL:
4662 case TYPE_FPDIVDBL:
4663 case TYPE_FPSQRTSGL:
4664 case TYPE_FPSQRTDBL:
4665 /* A fpload can't be issued until one cycle before a
4666 preceding arithmetic operation has finished if
4667 the target of the fpload is the destination of the
4668 arithmetic operation.
4669
4670 Exception: For PA7100LC, PA7200 and PA7300, the cost
4671 is 3 cycles, unless they bundle together. We also
4672 pay the penalty if the second insn is a fpload. */
4673 return insn_default_latency (dep_insn) - 1;
4674
4675 default:
4676 return 0;
4677 }
4678 }
4679 }
4680 else if (attr_type == TYPE_FPALU)
4681 {
4682 rtx pat = PATTERN (insn);
4683 rtx dep_pat = PATTERN (dep_insn);
4684 if (GET_CODE (pat) == PARALLEL)
4685 {
4686 /* This happens for the fldXs,mb patterns. */
4687 pat = XVECEXP (pat, 0, 0);
4688 }
4689 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4690 /* If this happens, we have to extend this to schedule
4691 optimally. Return 0 for now. */
4692 return 0;
4693
4694 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4695 {
4696 if (! recog_memoized (dep_insn))
4697 return 0;
4698 switch (get_attr_type (dep_insn))
4699 {
4700 case TYPE_FPDIVSGL:
4701 case TYPE_FPDIVDBL:
4702 case TYPE_FPSQRTSGL:
4703 case TYPE_FPSQRTDBL:
4704 /* An ALU flop can't be issued until two cycles before a
4705 preceding divide or sqrt operation has finished if
4706 the target of the ALU flop is also the target of
4707 the divide or sqrt operation. */
4708 return insn_default_latency (dep_insn) - 2;
4709
4710 default:
4711 return 0;
4712 }
4713 }
4714 }
4715
4716 /* For other output dependencies, the cost is 0. */
4717 return 0;
4718
4719 default:
4720 gcc_unreachable ();
4721 }
4722 }
4723
4724 /* Adjust scheduling priorities. We use this to try and keep addil
4725 and the next use of %r1 close together. */
4726 static int
4727 pa_adjust_priority (rtx insn, int priority)
4728 {
4729 rtx set = single_set (insn);
4730 rtx src, dest;
4731 if (set)
4732 {
4733 src = SET_SRC (set);
4734 dest = SET_DEST (set);
4735 if (GET_CODE (src) == LO_SUM
4736 && symbolic_operand (XEXP (src, 1), VOIDmode)
4737 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4738 priority >>= 3;
4739
4740 else if (GET_CODE (src) == MEM
4741 && GET_CODE (XEXP (src, 0)) == LO_SUM
4742 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4743 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4744 priority >>= 1;
4745
4746 else if (GET_CODE (dest) == MEM
4747 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4748 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4749 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4750 priority >>= 3;
4751 }
4752 return priority;
4753 }
4754
4755 /* The 700 can only issue a single insn at a time.
4756 The 7XXX processors can issue two insns at a time.
4757 The 8000 can issue 4 insns at a time. */
4758 static int
4759 pa_issue_rate (void)
4760 {
4761 switch (pa_cpu)
4762 {
4763 case PROCESSOR_700: return 1;
4764 case PROCESSOR_7100: return 2;
4765 case PROCESSOR_7100LC: return 2;
4766 case PROCESSOR_7200: return 2;
4767 case PROCESSOR_7300: return 2;
4768 case PROCESSOR_8000: return 4;
4769
4770 default:
4771 gcc_unreachable ();
4772 }
4773 }
4774
4775
4776
4777 /* Return any length adjustment needed by INSN which already has its length
4778 computed as LENGTH. Return zero if no adjustment is necessary.
4779
4780 For the PA: function calls, millicode calls, and backwards short
4781 conditional branches with unfilled delay slots need an adjustment by +1
4782 (to account for the NOP which will be inserted into the instruction stream).
4783
4784 Also compute the length of an inline block move here as it is too
4785 complicated to express as a length attribute in pa.md. */
4786 int
4787 pa_adjust_insn_length (rtx insn, int length)
4788 {
4789 rtx pat = PATTERN (insn);
4790
4791 /* Jumps inside switch tables which have unfilled delay slots need
4792 adjustment. */
4793 if (GET_CODE (insn) == JUMP_INSN
4794 && GET_CODE (pat) == PARALLEL
4795 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4796 return 4;
4797 /* Millicode insn with an unfilled delay slot. */
4798 else if (GET_CODE (insn) == INSN
4799 && GET_CODE (pat) != SEQUENCE
4800 && GET_CODE (pat) != USE
4801 && GET_CODE (pat) != CLOBBER
4802 && get_attr_type (insn) == TYPE_MILLI)
4803 return 4;
4804 /* Block move pattern. */
4805 else if (GET_CODE (insn) == INSN
4806 && GET_CODE (pat) == PARALLEL
4807 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4808 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4809 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4810 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4811 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4812 return compute_movmem_length (insn) - 4;
4813 /* Block clear pattern. */
4814 else if (GET_CODE (insn) == INSN
4815 && GET_CODE (pat) == PARALLEL
4816 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4817 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4818 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4819 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4820 return compute_clrmem_length (insn) - 4;
4821 /* Conditional branch with an unfilled delay slot. */
4822 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4823 {
4824 /* Adjust a short backwards conditional with an unfilled delay slot. */
4825 if (GET_CODE (pat) == SET
4826 && length == 4
4827 && JUMP_LABEL (insn) != NULL_RTX
4828 && ! forward_branch_p (insn))
4829 return 4;
4830 else if (GET_CODE (pat) == PARALLEL
4831 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4832 && length == 4)
4833 return 4;
4834 /* Adjust dbra insn with short backwards conditional branch with
4835 unfilled delay slot -- only for case where counter is in a
4836 general register register. */
4837 else if (GET_CODE (pat) == PARALLEL
4838 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4839 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4840 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4841 && length == 4
4842 && ! forward_branch_p (insn))
4843 return 4;
4844 else
4845 return 0;
4846 }
4847 return 0;
4848 }
4849
4850 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4851
4852 static bool
4853 pa_print_operand_punct_valid_p (unsigned char code)
4854 {
4855 if (code == '@'
4856 || code == '#'
4857 || code == '*'
4858 || code == '^')
4859 return true;
4860
4861 return false;
4862 }
4863
4864 /* Print operand X (an rtx) in assembler syntax to file FILE.
4865 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4866 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4867
4868 void
4869 print_operand (FILE *file, rtx x, int code)
4870 {
4871 switch (code)
4872 {
4873 case '#':
4874 /* Output a 'nop' if there's nothing for the delay slot. */
4875 if (dbr_sequence_length () == 0)
4876 fputs ("\n\tnop", file);
4877 return;
4878 case '*':
4879 /* Output a nullification completer if there's nothing for the */
4880 /* delay slot or nullification is requested. */
4881 if (dbr_sequence_length () == 0 ||
4882 (final_sequence &&
4883 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4884 fputs (",n", file);
4885 return;
4886 case 'R':
4887 /* Print out the second register name of a register pair.
4888 I.e., R (6) => 7. */
4889 fputs (reg_names[REGNO (x) + 1], file);
4890 return;
4891 case 'r':
4892 /* A register or zero. */
4893 if (x == const0_rtx
4894 || (x == CONST0_RTX (DFmode))
4895 || (x == CONST0_RTX (SFmode)))
4896 {
4897 fputs ("%r0", file);
4898 return;
4899 }
4900 else
4901 break;
4902 case 'f':
4903 /* A register or zero (floating point). */
4904 if (x == const0_rtx
4905 || (x == CONST0_RTX (DFmode))
4906 || (x == CONST0_RTX (SFmode)))
4907 {
4908 fputs ("%fr0", file);
4909 return;
4910 }
4911 else
4912 break;
4913 case 'A':
4914 {
4915 rtx xoperands[2];
4916
4917 xoperands[0] = XEXP (XEXP (x, 0), 0);
4918 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4919 output_global_address (file, xoperands[1], 0);
4920 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4921 return;
4922 }
4923
4924 case 'C': /* Plain (C)ondition */
4925 case 'X':
4926 switch (GET_CODE (x))
4927 {
4928 case EQ:
4929 fputs ("=", file); break;
4930 case NE:
4931 fputs ("<>", file); break;
4932 case GT:
4933 fputs (">", file); break;
4934 case GE:
4935 fputs (">=", file); break;
4936 case GEU:
4937 fputs (">>=", file); break;
4938 case GTU:
4939 fputs (">>", file); break;
4940 case LT:
4941 fputs ("<", file); break;
4942 case LE:
4943 fputs ("<=", file); break;
4944 case LEU:
4945 fputs ("<<=", file); break;
4946 case LTU:
4947 fputs ("<<", file); break;
4948 default:
4949 gcc_unreachable ();
4950 }
4951 return;
4952 case 'N': /* Condition, (N)egated */
4953 switch (GET_CODE (x))
4954 {
4955 case EQ:
4956 fputs ("<>", file); break;
4957 case NE:
4958 fputs ("=", file); break;
4959 case GT:
4960 fputs ("<=", file); break;
4961 case GE:
4962 fputs ("<", file); break;
4963 case GEU:
4964 fputs ("<<", file); break;
4965 case GTU:
4966 fputs ("<<=", file); break;
4967 case LT:
4968 fputs (">=", file); break;
4969 case LE:
4970 fputs (">", file); break;
4971 case LEU:
4972 fputs (">>", file); break;
4973 case LTU:
4974 fputs (">>=", file); break;
4975 default:
4976 gcc_unreachable ();
4977 }
4978 return;
4979 /* For floating point comparisons. Note that the output
4980 predicates are the complement of the desired mode. The
4981 conditions for GT, GE, LT, LE and LTGT cause an invalid
4982 operation exception if the result is unordered and this
4983 exception is enabled in the floating-point status register. */
4984 case 'Y':
4985 switch (GET_CODE (x))
4986 {
4987 case EQ:
4988 fputs ("!=", file); break;
4989 case NE:
4990 fputs ("=", file); break;
4991 case GT:
4992 fputs ("!>", file); break;
4993 case GE:
4994 fputs ("!>=", file); break;
4995 case LT:
4996 fputs ("!<", file); break;
4997 case LE:
4998 fputs ("!<=", file); break;
4999 case LTGT:
5000 fputs ("!<>", file); break;
5001 case UNLE:
5002 fputs ("!?<=", file); break;
5003 case UNLT:
5004 fputs ("!?<", file); break;
5005 case UNGE:
5006 fputs ("!?>=", file); break;
5007 case UNGT:
5008 fputs ("!?>", file); break;
5009 case UNEQ:
5010 fputs ("!?=", file); break;
5011 case UNORDERED:
5012 fputs ("!?", file); break;
5013 case ORDERED:
5014 fputs ("?", file); break;
5015 default:
5016 gcc_unreachable ();
5017 }
5018 return;
5019 case 'S': /* Condition, operands are (S)wapped. */
5020 switch (GET_CODE (x))
5021 {
5022 case EQ:
5023 fputs ("=", file); break;
5024 case NE:
5025 fputs ("<>", file); break;
5026 case GT:
5027 fputs ("<", file); break;
5028 case GE:
5029 fputs ("<=", file); break;
5030 case GEU:
5031 fputs ("<<=", file); break;
5032 case GTU:
5033 fputs ("<<", file); break;
5034 case LT:
5035 fputs (">", file); break;
5036 case LE:
5037 fputs (">=", file); break;
5038 case LEU:
5039 fputs (">>=", file); break;
5040 case LTU:
5041 fputs (">>", file); break;
5042 default:
5043 gcc_unreachable ();
5044 }
5045 return;
5046 case 'B': /* Condition, (B)oth swapped and negate. */
5047 switch (GET_CODE (x))
5048 {
5049 case EQ:
5050 fputs ("<>", file); break;
5051 case NE:
5052 fputs ("=", file); break;
5053 case GT:
5054 fputs (">=", file); break;
5055 case GE:
5056 fputs (">", file); break;
5057 case GEU:
5058 fputs (">>", file); break;
5059 case GTU:
5060 fputs (">>=", file); break;
5061 case LT:
5062 fputs ("<=", file); break;
5063 case LE:
5064 fputs ("<", file); break;
5065 case LEU:
5066 fputs ("<<", file); break;
5067 case LTU:
5068 fputs ("<<=", file); break;
5069 default:
5070 gcc_unreachable ();
5071 }
5072 return;
5073 case 'k':
5074 gcc_assert (GET_CODE (x) == CONST_INT);
5075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5076 return;
5077 case 'Q':
5078 gcc_assert (GET_CODE (x) == CONST_INT);
5079 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5080 return;
5081 case 'L':
5082 gcc_assert (GET_CODE (x) == CONST_INT);
5083 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5084 return;
5085 case 'O':
5086 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5087 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5088 return;
5089 case 'p':
5090 gcc_assert (GET_CODE (x) == CONST_INT);
5091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5092 return;
5093 case 'P':
5094 gcc_assert (GET_CODE (x) == CONST_INT);
5095 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5096 return;
5097 case 'I':
5098 if (GET_CODE (x) == CONST_INT)
5099 fputs ("i", file);
5100 return;
5101 case 'M':
5102 case 'F':
5103 switch (GET_CODE (XEXP (x, 0)))
5104 {
5105 case PRE_DEC:
5106 case PRE_INC:
5107 if (ASSEMBLER_DIALECT == 0)
5108 fputs ("s,mb", file);
5109 else
5110 fputs (",mb", file);
5111 break;
5112 case POST_DEC:
5113 case POST_INC:
5114 if (ASSEMBLER_DIALECT == 0)
5115 fputs ("s,ma", file);
5116 else
5117 fputs (",ma", file);
5118 break;
5119 case PLUS:
5120 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5121 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5122 {
5123 if (ASSEMBLER_DIALECT == 0)
5124 fputs ("x", file);
5125 }
5126 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5127 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5128 {
5129 if (ASSEMBLER_DIALECT == 0)
5130 fputs ("x,s", file);
5131 else
5132 fputs (",s", file);
5133 }
5134 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5135 fputs ("s", file);
5136 break;
5137 default:
5138 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5139 fputs ("s", file);
5140 break;
5141 }
5142 return;
5143 case 'G':
5144 output_global_address (file, x, 0);
5145 return;
5146 case 'H':
5147 output_global_address (file, x, 1);
5148 return;
5149 case 0: /* Don't do anything special */
5150 break;
5151 case 'Z':
5152 {
5153 unsigned op[3];
5154 compute_zdepwi_operands (INTVAL (x), op);
5155 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5156 return;
5157 }
5158 case 'z':
5159 {
5160 unsigned op[3];
5161 compute_zdepdi_operands (INTVAL (x), op);
5162 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5163 return;
5164 }
5165 case 'c':
5166 /* We can get here from a .vtable_inherit due to our
5167 CONSTANT_ADDRESS_P rejecting perfectly good constant
5168 addresses. */
5169 break;
5170 default:
5171 gcc_unreachable ();
5172 }
5173 if (GET_CODE (x) == REG)
5174 {
5175 fputs (reg_names [REGNO (x)], file);
5176 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5177 {
5178 fputs ("R", file);
5179 return;
5180 }
5181 if (FP_REG_P (x)
5182 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5183 && (REGNO (x) & 1) == 0)
5184 fputs ("L", file);
5185 }
5186 else if (GET_CODE (x) == MEM)
5187 {
5188 int size = GET_MODE_SIZE (GET_MODE (x));
5189 rtx base = NULL_RTX;
5190 switch (GET_CODE (XEXP (x, 0)))
5191 {
5192 case PRE_DEC:
5193 case POST_DEC:
5194 base = XEXP (XEXP (x, 0), 0);
5195 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5196 break;
5197 case PRE_INC:
5198 case POST_INC:
5199 base = XEXP (XEXP (x, 0), 0);
5200 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5201 break;
5202 case PLUS:
5203 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5204 fprintf (file, "%s(%s)",
5205 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5206 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5207 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5208 fprintf (file, "%s(%s)",
5209 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5210 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5211 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5212 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5213 {
5214 /* Because the REG_POINTER flag can get lost during reload,
5215 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5216 index and base registers in the combined move patterns. */
5217 rtx base = XEXP (XEXP (x, 0), 1);
5218 rtx index = XEXP (XEXP (x, 0), 0);
5219
5220 fprintf (file, "%s(%s)",
5221 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5222 }
5223 else
5224 output_address (XEXP (x, 0));
5225 break;
5226 default:
5227 output_address (XEXP (x, 0));
5228 break;
5229 }
5230 }
5231 else
5232 output_addr_const (file, x);
5233 }
5234
5235 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5236
5237 void
5238 output_global_address (FILE *file, rtx x, int round_constant)
5239 {
5240
5241 /* Imagine (high (const (plus ...))). */
5242 if (GET_CODE (x) == HIGH)
5243 x = XEXP (x, 0);
5244
5245 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5246 output_addr_const (file, x);
5247 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5248 {
5249 output_addr_const (file, x);
5250 fputs ("-$global$", file);
5251 }
5252 else if (GET_CODE (x) == CONST)
5253 {
5254 const char *sep = "";
5255 int offset = 0; /* assembler wants -$global$ at end */
5256 rtx base = NULL_RTX;
5257
5258 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5259 {
5260 case SYMBOL_REF:
5261 base = XEXP (XEXP (x, 0), 0);
5262 output_addr_const (file, base);
5263 break;
5264 case CONST_INT:
5265 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5266 break;
5267 default:
5268 gcc_unreachable ();
5269 }
5270
5271 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5272 {
5273 case SYMBOL_REF:
5274 base = XEXP (XEXP (x, 0), 1);
5275 output_addr_const (file, base);
5276 break;
5277 case CONST_INT:
5278 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5279 break;
5280 default:
5281 gcc_unreachable ();
5282 }
5283
5284 /* How bogus. The compiler is apparently responsible for
5285 rounding the constant if it uses an LR field selector.
5286
5287 The linker and/or assembler seem a better place since
5288 they have to do this kind of thing already.
5289
5290 If we fail to do this, HP's optimizing linker may eliminate
5291 an addil, but not update the ldw/stw/ldo instruction that
5292 uses the result of the addil. */
5293 if (round_constant)
5294 offset = ((offset + 0x1000) & ~0x1fff);
5295
5296 switch (GET_CODE (XEXP (x, 0)))
5297 {
5298 case PLUS:
5299 if (offset < 0)
5300 {
5301 offset = -offset;
5302 sep = "-";
5303 }
5304 else
5305 sep = "+";
5306 break;
5307
5308 case MINUS:
5309 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5310 sep = "-";
5311 break;
5312
5313 default:
5314 gcc_unreachable ();
5315 }
5316
5317 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5318 fputs ("-$global$", file);
5319 if (offset)
5320 fprintf (file, "%s%d", sep, offset);
5321 }
5322 else
5323 output_addr_const (file, x);
5324 }
5325
5326 /* Output boilerplate text to appear at the beginning of the file.
5327 There are several possible versions. */
5328 #define aputs(x) fputs(x, asm_out_file)
5329 static inline void
5330 pa_file_start_level (void)
5331 {
5332 if (TARGET_64BIT)
5333 aputs ("\t.LEVEL 2.0w\n");
5334 else if (TARGET_PA_20)
5335 aputs ("\t.LEVEL 2.0\n");
5336 else if (TARGET_PA_11)
5337 aputs ("\t.LEVEL 1.1\n");
5338 else
5339 aputs ("\t.LEVEL 1.0\n");
5340 }
5341
5342 static inline void
5343 pa_file_start_space (int sortspace)
5344 {
5345 aputs ("\t.SPACE $PRIVATE$");
5346 if (sortspace)
5347 aputs (",SORT=16");
5348 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5349 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5350 "\n\t.SPACE $TEXT$");
5351 if (sortspace)
5352 aputs (",SORT=8");
5353 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5354 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5355 }
5356
5357 static inline void
5358 pa_file_start_file (int want_version)
5359 {
5360 if (write_symbols != NO_DEBUG)
5361 {
5362 output_file_directive (asm_out_file, main_input_filename);
5363 if (want_version)
5364 aputs ("\t.version\t\"01.01\"\n");
5365 }
5366 }
5367
5368 static inline void
5369 pa_file_start_mcount (const char *aswhat)
5370 {
5371 if (profile_flag)
5372 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5373 }
5374
5375 static void
5376 pa_elf_file_start (void)
5377 {
5378 pa_file_start_level ();
5379 pa_file_start_mcount ("ENTRY");
5380 pa_file_start_file (0);
5381 }
5382
5383 static void
5384 pa_som_file_start (void)
5385 {
5386 pa_file_start_level ();
5387 pa_file_start_space (0);
5388 aputs ("\t.IMPORT $global$,DATA\n"
5389 "\t.IMPORT $$dyncall,MILLICODE\n");
5390 pa_file_start_mcount ("CODE");
5391 pa_file_start_file (0);
5392 }
5393
5394 static void
5395 pa_linux_file_start (void)
5396 {
5397 pa_file_start_file (1);
5398 pa_file_start_level ();
5399 pa_file_start_mcount ("CODE");
5400 }
5401
5402 static void
5403 pa_hpux64_gas_file_start (void)
5404 {
5405 pa_file_start_level ();
5406 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5407 if (profile_flag)
5408 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5409 #endif
5410 pa_file_start_file (1);
5411 }
5412
5413 static void
5414 pa_hpux64_hpas_file_start (void)
5415 {
5416 pa_file_start_level ();
5417 pa_file_start_space (1);
5418 pa_file_start_mcount ("CODE");
5419 pa_file_start_file (0);
5420 }
5421 #undef aputs
5422
5423 /* Search the deferred plabel list for SYMBOL and return its internal
5424 label. If an entry for SYMBOL is not found, a new entry is created. */
5425
5426 rtx
5427 get_deferred_plabel (rtx symbol)
5428 {
5429 const char *fname = XSTR (symbol, 0);
5430 size_t i;
5431
5432 /* See if we have already put this function on the list of deferred
5433 plabels. This list is generally small, so a liner search is not
5434 too ugly. If it proves too slow replace it with something faster. */
5435 for (i = 0; i < n_deferred_plabels; i++)
5436 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5437 break;
5438
5439 /* If the deferred plabel list is empty, or this entry was not found
5440 on the list, create a new entry on the list. */
5441 if (deferred_plabels == NULL || i == n_deferred_plabels)
5442 {
5443 tree id;
5444
5445 if (deferred_plabels == 0)
5446 deferred_plabels = ggc_alloc_deferred_plabel ();
5447 else
5448 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5449 deferred_plabels,
5450 n_deferred_plabels + 1);
5451
5452 i = n_deferred_plabels++;
5453 deferred_plabels[i].internal_label = gen_label_rtx ();
5454 deferred_plabels[i].symbol = symbol;
5455
5456 /* Gross. We have just implicitly taken the address of this
5457 function. Mark it in the same manner as assemble_name. */
5458 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5459 if (id)
5460 mark_referenced (id);
5461 }
5462
5463 return deferred_plabels[i].internal_label;
5464 }
5465
5466 static void
5467 output_deferred_plabels (void)
5468 {
5469 size_t i;
5470
5471 /* If we have some deferred plabels, then we need to switch into the
5472 data or readonly data section, and align it to a 4 byte boundary
5473 before outputting the deferred plabels. */
5474 if (n_deferred_plabels)
5475 {
5476 switch_to_section (flag_pic ? data_section : readonly_data_section);
5477 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5478 }
5479
5480 /* Now output the deferred plabels. */
5481 for (i = 0; i < n_deferred_plabels; i++)
5482 {
5483 targetm.asm_out.internal_label (asm_out_file, "L",
5484 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5485 assemble_integer (deferred_plabels[i].symbol,
5486 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5487 }
5488 }
5489
5490 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5491 /* Initialize optabs to point to HPUX long double emulation routines. */
5492 static void
5493 pa_hpux_init_libfuncs (void)
5494 {
5495 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5496 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5497 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5498 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5499 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5500 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5501 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5502 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5503 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5504
5505 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5506 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5507 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5508 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5509 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5510 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5511 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5512
5513 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5514 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5515 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5516 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5517
5518 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5519 ? "__U_Qfcnvfxt_quad_to_sgl"
5520 : "_U_Qfcnvfxt_quad_to_sgl");
5521 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5522 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5523 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5524
5525 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5526 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5527 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5528 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5529 }
5530 #endif
5531
5532 /* HP's millicode routines mean something special to the assembler.
5533 Keep track of which ones we have used. */
5534
5535 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5536 static void import_milli (enum millicodes);
5537 static char imported[(int) end1000];
5538 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5539 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5540 #define MILLI_START 10
5541
5542 static void
5543 import_milli (enum millicodes code)
5544 {
5545 char str[sizeof (import_string)];
5546
5547 if (!imported[(int) code])
5548 {
5549 imported[(int) code] = 1;
5550 strcpy (str, import_string);
5551 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5552 output_asm_insn (str, 0);
5553 }
5554 }
5555
5556 /* The register constraints have put the operands and return value in
5557 the proper registers. */
5558
5559 const char *
5560 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5561 {
5562 import_milli (mulI);
5563 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5564 }
5565
5566 /* Emit the rtl for doing a division by a constant. */
5567
5568 /* Do magic division millicodes exist for this value? */
5569 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5570
5571 /* We'll use an array to keep track of the magic millicodes and
5572 whether or not we've used them already. [n][0] is signed, [n][1] is
5573 unsigned. */
5574
5575 static int div_milli[16][2];
5576
5577 int
5578 emit_hpdiv_const (rtx *operands, int unsignedp)
5579 {
5580 if (GET_CODE (operands[2]) == CONST_INT
5581 && INTVAL (operands[2]) > 0
5582 && INTVAL (operands[2]) < 16
5583 && magic_milli[INTVAL (operands[2])])
5584 {
5585 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5586
5587 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5588 emit
5589 (gen_rtx_PARALLEL
5590 (VOIDmode,
5591 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5592 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5593 SImode,
5594 gen_rtx_REG (SImode, 26),
5595 operands[2])),
5596 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5597 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5598 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5599 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5600 gen_rtx_CLOBBER (VOIDmode, ret))));
5601 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5602 return 1;
5603 }
5604 return 0;
5605 }
5606
5607 const char *
5608 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5609 {
5610 int divisor;
5611
5612 /* If the divisor is a constant, try to use one of the special
5613 opcodes .*/
5614 if (GET_CODE (operands[0]) == CONST_INT)
5615 {
5616 static char buf[100];
5617 divisor = INTVAL (operands[0]);
5618 if (!div_milli[divisor][unsignedp])
5619 {
5620 div_milli[divisor][unsignedp] = 1;
5621 if (unsignedp)
5622 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5623 else
5624 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5625 }
5626 if (unsignedp)
5627 {
5628 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5629 INTVAL (operands[0]));
5630 return output_millicode_call (insn,
5631 gen_rtx_SYMBOL_REF (SImode, buf));
5632 }
5633 else
5634 {
5635 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5636 INTVAL (operands[0]));
5637 return output_millicode_call (insn,
5638 gen_rtx_SYMBOL_REF (SImode, buf));
5639 }
5640 }
5641 /* Divisor isn't a special constant. */
5642 else
5643 {
5644 if (unsignedp)
5645 {
5646 import_milli (divU);
5647 return output_millicode_call (insn,
5648 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5649 }
5650 else
5651 {
5652 import_milli (divI);
5653 return output_millicode_call (insn,
5654 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5655 }
5656 }
5657 }
5658
5659 /* Output a $$rem millicode to do mod. */
5660
5661 const char *
5662 output_mod_insn (int unsignedp, rtx insn)
5663 {
5664 if (unsignedp)
5665 {
5666 import_milli (remU);
5667 return output_millicode_call (insn,
5668 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5669 }
5670 else
5671 {
5672 import_milli (remI);
5673 return output_millicode_call (insn,
5674 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5675 }
5676 }
5677
5678 void
5679 output_arg_descriptor (rtx call_insn)
5680 {
5681 const char *arg_regs[4];
5682 enum machine_mode arg_mode;
5683 rtx link;
5684 int i, output_flag = 0;
5685 int regno;
5686
5687 /* We neither need nor want argument location descriptors for the
5688 64bit runtime environment or the ELF32 environment. */
5689 if (TARGET_64BIT || TARGET_ELF32)
5690 return;
5691
5692 for (i = 0; i < 4; i++)
5693 arg_regs[i] = 0;
5694
5695 /* Specify explicitly that no argument relocations should take place
5696 if using the portable runtime calling conventions. */
5697 if (TARGET_PORTABLE_RUNTIME)
5698 {
5699 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5700 asm_out_file);
5701 return;
5702 }
5703
5704 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5705 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5706 link; link = XEXP (link, 1))
5707 {
5708 rtx use = XEXP (link, 0);
5709
5710 if (! (GET_CODE (use) == USE
5711 && GET_CODE (XEXP (use, 0)) == REG
5712 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5713 continue;
5714
5715 arg_mode = GET_MODE (XEXP (use, 0));
5716 regno = REGNO (XEXP (use, 0));
5717 if (regno >= 23 && regno <= 26)
5718 {
5719 arg_regs[26 - regno] = "GR";
5720 if (arg_mode == DImode)
5721 arg_regs[25 - regno] = "GR";
5722 }
5723 else if (regno >= 32 && regno <= 39)
5724 {
5725 if (arg_mode == SFmode)
5726 arg_regs[(regno - 32) / 2] = "FR";
5727 else
5728 {
5729 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5730 arg_regs[(regno - 34) / 2] = "FR";
5731 arg_regs[(regno - 34) / 2 + 1] = "FU";
5732 #else
5733 arg_regs[(regno - 34) / 2] = "FU";
5734 arg_regs[(regno - 34) / 2 + 1] = "FR";
5735 #endif
5736 }
5737 }
5738 }
5739 fputs ("\t.CALL ", asm_out_file);
5740 for (i = 0; i < 4; i++)
5741 {
5742 if (arg_regs[i])
5743 {
5744 if (output_flag++)
5745 fputc (',', asm_out_file);
5746 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5747 }
5748 }
5749 fputc ('\n', asm_out_file);
5750 }
5751 \f
5752 static reg_class_t
5753 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5754 enum machine_mode mode, secondary_reload_info *sri)
5755 {
5756 int is_symbolic, regno;
5757 enum reg_class rclass = (enum reg_class) rclass_i;
5758
5759 /* Handle the easy stuff first. */
5760 if (rclass == R1_REGS)
5761 return NO_REGS;
5762
5763 if (REG_P (x))
5764 {
5765 regno = REGNO (x);
5766 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5767 return NO_REGS;
5768 }
5769 else
5770 regno = -1;
5771
5772 /* If we have something like (mem (mem (...)), we can safely assume the
5773 inner MEM will end up in a general register after reloading, so there's
5774 no need for a secondary reload. */
5775 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5776 return NO_REGS;
5777
5778 /* Trying to load a constant into a FP register during PIC code
5779 generation requires %r1 as a scratch register. */
5780 if (flag_pic
5781 && (mode == SImode || mode == DImode)
5782 && FP_REG_CLASS_P (rclass)
5783 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5784 {
5785 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5786 : CODE_FOR_reload_indi_r1);
5787 return NO_REGS;
5788 }
5789
5790 /* Profiling showed the PA port spends about 1.3% of its compilation
5791 time in true_regnum from calls inside pa_secondary_reload_class. */
5792 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5793 regno = true_regnum (x);
5794
5795 /* In order to allow 14-bit displacements in integer loads and stores,
5796 we need to prevent reload from generating out of range integer mode
5797 loads and stores to the floating point registers. Previously, we
5798 used to call for a secondary reload and have emit_move_sequence()
5799 fix the instruction sequence. However, reload occasionally wouldn't
5800 generate the reload and we would end up with an invalid REG+D memory
5801 address. So, now we use an intermediate general register for most
5802 memory loads and stores. */
5803 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5804 && GET_MODE_CLASS (mode) == MODE_INT
5805 && FP_REG_CLASS_P (rclass))
5806 {
5807 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5808 the secondary reload needed for a pseudo. It never passes a
5809 REG+D address. */
5810 if (GET_CODE (x) == MEM)
5811 {
5812 x = XEXP (x, 0);
5813
5814 /* We don't need an intermediate for indexed and LO_SUM DLT
5815 memory addresses. When INT14_OK_STRICT is true, it might
5816 appear that we could directly allow register indirect
5817 memory addresses. However, this doesn't work because we
5818 don't support SUBREGs in floating-point register copies
5819 and reload doesn't tell us when it's going to use a SUBREG. */
5820 if (IS_INDEX_ADDR_P (x)
5821 || IS_LO_SUM_DLT_ADDR_P (x))
5822 return NO_REGS;
5823
5824 /* Otherwise, we need an intermediate general register. */
5825 return GENERAL_REGS;
5826 }
5827
5828 /* Request a secondary reload with a general scratch register
5829 for everthing else. ??? Could symbolic operands be handled
5830 directly when generating non-pic PA 2.0 code? */
5831 sri->icode = (in_p
5832 ? direct_optab_handler (reload_in_optab, mode)
5833 : direct_optab_handler (reload_out_optab, mode));
5834 return NO_REGS;
5835 }
5836
5837 /* We need a secondary register (GPR) for copies between the SAR
5838 and anything other than a general register. */
5839 if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5840 {
5841 sri->icode = (in_p
5842 ? direct_optab_handler (reload_in_optab, mode)
5843 : direct_optab_handler (reload_out_optab, mode));
5844 return NO_REGS;
5845 }
5846
5847 /* A SAR<->FP register copy requires a secondary register (GPR) as
5848 well as secondary memory. */
5849 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5850 && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5851 && FP_REG_CLASS_P (rclass)))
5852 {
5853 sri->icode = (in_p
5854 ? direct_optab_handler (reload_in_optab, mode)
5855 : direct_optab_handler (reload_out_optab, mode));
5856 return NO_REGS;
5857 }
5858
5859 /* Secondary reloads of symbolic operands require %r1 as a scratch
5860 register when we're generating PIC code and when the operand isn't
5861 readonly. */
5862 if (GET_CODE (x) == HIGH)
5863 x = XEXP (x, 0);
5864
5865 /* Profiling has showed GCC spends about 2.6% of its compilation
5866 time in symbolic_operand from calls inside pa_secondary_reload_class.
5867 So, we use an inline copy to avoid useless work. */
5868 switch (GET_CODE (x))
5869 {
5870 rtx op;
5871
5872 case SYMBOL_REF:
5873 is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5874 break;
5875 case LABEL_REF:
5876 is_symbolic = 1;
5877 break;
5878 case CONST:
5879 op = XEXP (x, 0);
5880 is_symbolic = (GET_CODE (op) == PLUS
5881 && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5882 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5883 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5884 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5885 break;
5886 default:
5887 is_symbolic = 0;
5888 break;
5889 }
5890
5891 if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5892 {
5893 gcc_assert (mode == SImode || mode == DImode);
5894 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5895 : CODE_FOR_reload_indi_r1);
5896 }
5897
5898 return NO_REGS;
5899 }
5900
5901 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5902 is only marked as live on entry by df-scan when it is a fixed
5903 register. It isn't a fixed register in the 64-bit runtime,
5904 so we need to mark it here. */
5905
5906 static void
5907 pa_extra_live_on_entry (bitmap regs)
5908 {
5909 if (TARGET_64BIT)
5910 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5911 }
5912
5913 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5914 to prevent it from being deleted. */
5915
5916 rtx
5917 pa_eh_return_handler_rtx (void)
5918 {
5919 rtx tmp;
5920
5921 tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5922 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5923 tmp = gen_rtx_MEM (word_mode, tmp);
5924 tmp->volatil = 1;
5925 return tmp;
5926 }
5927
5928 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5929 by invisible reference. As a GCC extension, we also pass anything
5930 with a zero or variable size by reference.
5931
5932 The 64-bit runtime does not describe passing any types by invisible
5933 reference. The internals of GCC can't currently handle passing
5934 empty structures, and zero or variable length arrays when they are
5935 not passed entirely on the stack or by reference. Thus, as a GCC
5936 extension, we pass these types by reference. The HP compiler doesn't
5937 support these types, so hopefully there shouldn't be any compatibility
5938 issues. This may have to be revisited when HP releases a C99 compiler
5939 or updates the ABI. */
5940
5941 static bool
5942 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5943 enum machine_mode mode, const_tree type,
5944 bool named ATTRIBUTE_UNUSED)
5945 {
5946 HOST_WIDE_INT size;
5947
5948 if (type)
5949 size = int_size_in_bytes (type);
5950 else
5951 size = GET_MODE_SIZE (mode);
5952
5953 if (TARGET_64BIT)
5954 return size <= 0;
5955 else
5956 return size <= 0 || size > 8;
5957 }
5958
5959 enum direction
5960 function_arg_padding (enum machine_mode mode, const_tree type)
5961 {
5962 if (mode == BLKmode
5963 || (TARGET_64BIT
5964 && type
5965 && (AGGREGATE_TYPE_P (type)
5966 || TREE_CODE (type) == COMPLEX_TYPE
5967 || TREE_CODE (type) == VECTOR_TYPE)))
5968 {
5969 /* Return none if justification is not required. */
5970 if (type
5971 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5972 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5973 return none;
5974
5975 /* The directions set here are ignored when a BLKmode argument larger
5976 than a word is placed in a register. Different code is used for
5977 the stack and registers. This makes it difficult to have a
5978 consistent data representation for both the stack and registers.
5979 For both runtimes, the justification and padding for arguments on
5980 the stack and in registers should be identical. */
5981 if (TARGET_64BIT)
5982 /* The 64-bit runtime specifies left justification for aggregates. */
5983 return upward;
5984 else
5985 /* The 32-bit runtime architecture specifies right justification.
5986 When the argument is passed on the stack, the argument is padded
5987 with garbage on the left. The HP compiler pads with zeros. */
5988 return downward;
5989 }
5990
5991 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5992 return downward;
5993 else
5994 return none;
5995 }
5996
5997 \f
5998 /* Do what is necessary for `va_start'. We look at the current function
5999 to determine if stdargs or varargs is used and fill in an initial
6000 va_list. A pointer to this constructor is returned. */
6001
6002 static rtx
6003 hppa_builtin_saveregs (void)
6004 {
6005 rtx offset, dest;
6006 tree fntype = TREE_TYPE (current_function_decl);
6007 int argadj = ((!stdarg_p (fntype))
6008 ? UNITS_PER_WORD : 0);
6009
6010 if (argadj)
6011 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
6012 else
6013 offset = crtl->args.arg_offset_rtx;
6014
6015 if (TARGET_64BIT)
6016 {
6017 int i, off;
6018
6019 /* Adjust for varargs/stdarg differences. */
6020 if (argadj)
6021 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
6022 else
6023 offset = crtl->args.arg_offset_rtx;
6024
6025 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6026 from the incoming arg pointer and growing to larger addresses. */
6027 for (i = 26, off = -64; i >= 19; i--, off += 8)
6028 emit_move_insn (gen_rtx_MEM (word_mode,
6029 plus_constant (arg_pointer_rtx, off)),
6030 gen_rtx_REG (word_mode, i));
6031
6032 /* The incoming args pointer points just beyond the flushback area;
6033 normally this is not a serious concern. However, when we are doing
6034 varargs/stdargs we want to make the arg pointer point to the start
6035 of the incoming argument area. */
6036 emit_move_insn (virtual_incoming_args_rtx,
6037 plus_constant (arg_pointer_rtx, -64));
6038
6039 /* Now return a pointer to the first anonymous argument. */
6040 return copy_to_reg (expand_binop (Pmode, add_optab,
6041 virtual_incoming_args_rtx,
6042 offset, 0, 0, OPTAB_LIB_WIDEN));
6043 }
6044
6045 /* Store general registers on the stack. */
6046 dest = gen_rtx_MEM (BLKmode,
6047 plus_constant (crtl->args.internal_arg_pointer,
6048 -16));
6049 set_mem_alias_set (dest, get_varargs_alias_set ());
6050 set_mem_align (dest, BITS_PER_WORD);
6051 move_block_from_reg (23, dest, 4);
6052
6053 /* move_block_from_reg will emit code to store the argument registers
6054 individually as scalar stores.
6055
6056 However, other insns may later load from the same addresses for
6057 a structure load (passing a struct to a varargs routine).
6058
6059 The alias code assumes that such aliasing can never happen, so we
6060 have to keep memory referencing insns from moving up beyond the
6061 last argument register store. So we emit a blockage insn here. */
6062 emit_insn (gen_blockage ());
6063
6064 return copy_to_reg (expand_binop (Pmode, add_optab,
6065 crtl->args.internal_arg_pointer,
6066 offset, 0, 0, OPTAB_LIB_WIDEN));
6067 }
6068
6069 static void
6070 hppa_va_start (tree valist, rtx nextarg)
6071 {
6072 nextarg = expand_builtin_saveregs ();
6073 std_expand_builtin_va_start (valist, nextarg);
6074 }
6075
6076 static tree
6077 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6078 gimple_seq *post_p)
6079 {
6080 if (TARGET_64BIT)
6081 {
6082 /* Args grow upward. We can use the generic routines. */
6083 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6084 }
6085 else /* !TARGET_64BIT */
6086 {
6087 tree ptr = build_pointer_type (type);
6088 tree valist_type;
6089 tree t, u;
6090 unsigned int size, ofs;
6091 bool indirect;
6092
6093 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6094 if (indirect)
6095 {
6096 type = ptr;
6097 ptr = build_pointer_type (type);
6098 }
6099 size = int_size_in_bytes (type);
6100 valist_type = TREE_TYPE (valist);
6101
6102 /* Args grow down. Not handled by generic routines. */
6103
6104 u = fold_convert (sizetype, size_in_bytes (type));
6105 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6106 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6107
6108 /* Align to 4 or 8 byte boundary depending on argument size. */
6109
6110 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6111 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6112 t = fold_convert (valist_type, t);
6113
6114 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6115
6116 ofs = (8 - size) % 4;
6117 if (ofs != 0)
6118 {
6119 u = size_int (ofs);
6120 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6121 }
6122
6123 t = fold_convert (ptr, t);
6124 t = build_va_arg_indirect_ref (t);
6125
6126 if (indirect)
6127 t = build_va_arg_indirect_ref (t);
6128
6129 return t;
6130 }
6131 }
6132
6133 /* True if MODE is valid for the target. By "valid", we mean able to
6134 be manipulated in non-trivial ways. In particular, this means all
6135 the arithmetic is supported.
6136
6137 Currently, TImode is not valid as the HP 64-bit runtime documentation
6138 doesn't document the alignment and calling conventions for this type.
6139 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6140 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6141
6142 static bool
6143 pa_scalar_mode_supported_p (enum machine_mode mode)
6144 {
6145 int precision = GET_MODE_PRECISION (mode);
6146
6147 switch (GET_MODE_CLASS (mode))
6148 {
6149 case MODE_PARTIAL_INT:
6150 case MODE_INT:
6151 if (precision == CHAR_TYPE_SIZE)
6152 return true;
6153 if (precision == SHORT_TYPE_SIZE)
6154 return true;
6155 if (precision == INT_TYPE_SIZE)
6156 return true;
6157 if (precision == LONG_TYPE_SIZE)
6158 return true;
6159 if (precision == LONG_LONG_TYPE_SIZE)
6160 return true;
6161 return false;
6162
6163 case MODE_FLOAT:
6164 if (precision == FLOAT_TYPE_SIZE)
6165 return true;
6166 if (precision == DOUBLE_TYPE_SIZE)
6167 return true;
6168 if (precision == LONG_DOUBLE_TYPE_SIZE)
6169 return true;
6170 return false;
6171
6172 case MODE_DECIMAL_FLOAT:
6173 return false;
6174
6175 default:
6176 gcc_unreachable ();
6177 }
6178 }
6179
6180 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6181 it branches to the next real instruction. Otherwise, return FALSE. */
6182
6183 static bool
6184 branch_to_delay_slot_p (rtx insn)
6185 {
6186 if (dbr_sequence_length ())
6187 return FALSE;
6188
6189 return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6190 }
6191
6192 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6193
6194 This occurs when INSN has an unfilled delay slot and is followed
6195 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6196 the jump branches into the delay slot. So, we add a nop in the delay
6197 slot just to be safe. This messes up our instruction count, but we
6198 don't know how big the ASM_INPUT insn is anyway. */
6199
6200 static bool
6201 branch_needs_nop_p (rtx insn)
6202 {
6203 rtx next_insn;
6204
6205 if (dbr_sequence_length ())
6206 return FALSE;
6207
6208 next_insn = next_real_insn (insn);
6209 return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6210 }
6211
6212 /* This routine handles all the normal conditional branch sequences we
6213 might need to generate. It handles compare immediate vs compare
6214 register, nullification of delay slots, varying length branches,
6215 negated branches, and all combinations of the above. It returns the
6216 output appropriate to emit the branch corresponding to all given
6217 parameters. */
6218
6219 const char *
6220 output_cbranch (rtx *operands, int negated, rtx insn)
6221 {
6222 static char buf[100];
6223 int useskip = 0;
6224 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6225 int length = get_attr_length (insn);
6226 int xdelay;
6227
6228 /* A conditional branch to the following instruction (e.g. the delay slot)
6229 is asking for a disaster. This can happen when not optimizing and
6230 when jump optimization fails.
6231
6232 While it is usually safe to emit nothing, this can fail if the
6233 preceding instruction is a nullified branch with an empty delay
6234 slot and the same branch target as this branch. We could check
6235 for this but jump optimization should eliminate nop jumps. It
6236 is always safe to emit a nop. */
6237 if (branch_to_delay_slot_p (insn))
6238 return "nop";
6239
6240 /* The doubleword form of the cmpib instruction doesn't have the LEU
6241 and GTU conditions while the cmpb instruction does. Since we accept
6242 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6243 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6244 operands[2] = gen_rtx_REG (DImode, 0);
6245 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6246 operands[1] = gen_rtx_REG (DImode, 0);
6247
6248 /* If this is a long branch with its delay slot unfilled, set `nullify'
6249 as it can nullify the delay slot and save a nop. */
6250 if (length == 8 && dbr_sequence_length () == 0)
6251 nullify = 1;
6252
6253 /* If this is a short forward conditional branch which did not get
6254 its delay slot filled, the delay slot can still be nullified. */
6255 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6256 nullify = forward_branch_p (insn);
6257
6258 /* A forward branch over a single nullified insn can be done with a
6259 comclr instruction. This avoids a single cycle penalty due to
6260 mis-predicted branch if we fall through (branch not taken). */
6261 if (length == 4
6262 && next_real_insn (insn) != 0
6263 && get_attr_length (next_real_insn (insn)) == 4
6264 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6265 && nullify)
6266 useskip = 1;
6267
6268 switch (length)
6269 {
6270 /* All short conditional branches except backwards with an unfilled
6271 delay slot. */
6272 case 4:
6273 if (useskip)
6274 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6275 else
6276 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6277 if (GET_MODE (operands[1]) == DImode)
6278 strcat (buf, "*");
6279 if (negated)
6280 strcat (buf, "%B3");
6281 else
6282 strcat (buf, "%S3");
6283 if (useskip)
6284 strcat (buf, " %2,%r1,%%r0");
6285 else if (nullify)
6286 {
6287 if (branch_needs_nop_p (insn))
6288 strcat (buf, ",n %2,%r1,%0%#");
6289 else
6290 strcat (buf, ",n %2,%r1,%0");
6291 }
6292 else
6293 strcat (buf, " %2,%r1,%0");
6294 break;
6295
6296 /* All long conditionals. Note a short backward branch with an
6297 unfilled delay slot is treated just like a long backward branch
6298 with an unfilled delay slot. */
6299 case 8:
6300 /* Handle weird backwards branch with a filled delay slot
6301 which is nullified. */
6302 if (dbr_sequence_length () != 0
6303 && ! forward_branch_p (insn)
6304 && nullify)
6305 {
6306 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6307 if (GET_MODE (operands[1]) == DImode)
6308 strcat (buf, "*");
6309 if (negated)
6310 strcat (buf, "%S3");
6311 else
6312 strcat (buf, "%B3");
6313 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6314 }
6315 /* Handle short backwards branch with an unfilled delay slot.
6316 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6317 taken and untaken branches. */
6318 else if (dbr_sequence_length () == 0
6319 && ! forward_branch_p (insn)
6320 && INSN_ADDRESSES_SET_P ()
6321 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6322 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6323 {
6324 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6325 if (GET_MODE (operands[1]) == DImode)
6326 strcat (buf, "*");
6327 if (negated)
6328 strcat (buf, "%B3 %2,%r1,%0%#");
6329 else
6330 strcat (buf, "%S3 %2,%r1,%0%#");
6331 }
6332 else
6333 {
6334 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6335 if (GET_MODE (operands[1]) == DImode)
6336 strcat (buf, "*");
6337 if (negated)
6338 strcat (buf, "%S3");
6339 else
6340 strcat (buf, "%B3");
6341 if (nullify)
6342 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6343 else
6344 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6345 }
6346 break;
6347
6348 default:
6349 /* The reversed conditional branch must branch over one additional
6350 instruction if the delay slot is filled and needs to be extracted
6351 by output_lbranch. If the delay slot is empty or this is a
6352 nullified forward branch, the instruction after the reversed
6353 condition branch must be nullified. */
6354 if (dbr_sequence_length () == 0
6355 || (nullify && forward_branch_p (insn)))
6356 {
6357 nullify = 1;
6358 xdelay = 0;
6359 operands[4] = GEN_INT (length);
6360 }
6361 else
6362 {
6363 xdelay = 1;
6364 operands[4] = GEN_INT (length + 4);
6365 }
6366
6367 /* Create a reversed conditional branch which branches around
6368 the following insns. */
6369 if (GET_MODE (operands[1]) != DImode)
6370 {
6371 if (nullify)
6372 {
6373 if (negated)
6374 strcpy (buf,
6375 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6376 else
6377 strcpy (buf,
6378 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6379 }
6380 else
6381 {
6382 if (negated)
6383 strcpy (buf,
6384 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6385 else
6386 strcpy (buf,
6387 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6388 }
6389 }
6390 else
6391 {
6392 if (nullify)
6393 {
6394 if (negated)
6395 strcpy (buf,
6396 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6397 else
6398 strcpy (buf,
6399 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6400 }
6401 else
6402 {
6403 if (negated)
6404 strcpy (buf,
6405 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6406 else
6407 strcpy (buf,
6408 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6409 }
6410 }
6411
6412 output_asm_insn (buf, operands);
6413 return output_lbranch (operands[0], insn, xdelay);
6414 }
6415 return buf;
6416 }
6417
6418 /* This routine handles output of long unconditional branches that
6419 exceed the maximum range of a simple branch instruction. Since
6420 we don't have a register available for the branch, we save register
6421 %r1 in the frame marker, load the branch destination DEST into %r1,
6422 execute the branch, and restore %r1 in the delay slot of the branch.
6423
6424 Since long branches may have an insn in the delay slot and the
6425 delay slot is used to restore %r1, we in general need to extract
6426 this insn and execute it before the branch. However, to facilitate
6427 use of this function by conditional branches, we also provide an
6428 option to not extract the delay insn so that it will be emitted
6429 after the long branch. So, if there is an insn in the delay slot,
6430 it is extracted if XDELAY is nonzero.
6431
6432 The lengths of the various long-branch sequences are 20, 16 and 24
6433 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6434
6435 const char *
6436 output_lbranch (rtx dest, rtx insn, int xdelay)
6437 {
6438 rtx xoperands[2];
6439
6440 xoperands[0] = dest;
6441
6442 /* First, free up the delay slot. */
6443 if (xdelay && dbr_sequence_length () != 0)
6444 {
6445 /* We can't handle a jump in the delay slot. */
6446 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6447
6448 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6449 optimize, 0, NULL);
6450
6451 /* Now delete the delay insn. */
6452 SET_INSN_DELETED (NEXT_INSN (insn));
6453 }
6454
6455 /* Output an insn to save %r1. The runtime documentation doesn't
6456 specify whether the "Clean Up" slot in the callers frame can
6457 be clobbered by the callee. It isn't copied by HP's builtin
6458 alloca, so this suggests that it can be clobbered if necessary.
6459 The "Static Link" location is copied by HP builtin alloca, so
6460 we avoid using it. Using the cleanup slot might be a problem
6461 if we have to interoperate with languages that pass cleanup
6462 information. However, it should be possible to handle these
6463 situations with GCC's asm feature.
6464
6465 The "Current RP" slot is reserved for the called procedure, so
6466 we try to use it when we don't have a frame of our own. It's
6467 rather unlikely that we won't have a frame when we need to emit
6468 a very long branch.
6469
6470 Really the way to go long term is a register scavenger; goto
6471 the target of the jump and find a register which we can use
6472 as a scratch to hold the value in %r1. Then, we wouldn't have
6473 to free up the delay slot or clobber a slot that may be needed
6474 for other purposes. */
6475 if (TARGET_64BIT)
6476 {
6477 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6478 /* Use the return pointer slot in the frame marker. */
6479 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6480 else
6481 /* Use the slot at -40 in the frame marker since HP builtin
6482 alloca doesn't copy it. */
6483 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6484 }
6485 else
6486 {
6487 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6488 /* Use the return pointer slot in the frame marker. */
6489 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6490 else
6491 /* Use the "Clean Up" slot in the frame marker. In GCC,
6492 the only other use of this location is for copying a
6493 floating point double argument from a floating-point
6494 register to two general registers. The copy is done
6495 as an "atomic" operation when outputting a call, so it
6496 won't interfere with our using the location here. */
6497 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6498 }
6499
6500 if (TARGET_PORTABLE_RUNTIME)
6501 {
6502 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6503 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6504 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6505 }
6506 else if (flag_pic)
6507 {
6508 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6509 if (TARGET_SOM || !TARGET_GAS)
6510 {
6511 xoperands[1] = gen_label_rtx ();
6512 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6513 targetm.asm_out.internal_label (asm_out_file, "L",
6514 CODE_LABEL_NUMBER (xoperands[1]));
6515 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6516 }
6517 else
6518 {
6519 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6520 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6521 }
6522 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6523 }
6524 else
6525 /* Now output a very long branch to the original target. */
6526 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6527
6528 /* Now restore the value of %r1 in the delay slot. */
6529 if (TARGET_64BIT)
6530 {
6531 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6532 return "ldd -16(%%r30),%%r1";
6533 else
6534 return "ldd -40(%%r30),%%r1";
6535 }
6536 else
6537 {
6538 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6539 return "ldw -20(%%r30),%%r1";
6540 else
6541 return "ldw -12(%%r30),%%r1";
6542 }
6543 }
6544
6545 /* This routine handles all the branch-on-bit conditional branch sequences we
6546 might need to generate. It handles nullification of delay slots,
6547 varying length branches, negated branches and all combinations of the
6548 above. it returns the appropriate output template to emit the branch. */
6549
6550 const char *
6551 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6552 {
6553 static char buf[100];
6554 int useskip = 0;
6555 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6556 int length = get_attr_length (insn);
6557 int xdelay;
6558
6559 /* A conditional branch to the following instruction (e.g. the delay slot) is
6560 asking for a disaster. I do not think this can happen as this pattern
6561 is only used when optimizing; jump optimization should eliminate the
6562 jump. But be prepared just in case. */
6563
6564 if (branch_to_delay_slot_p (insn))
6565 return "nop";
6566
6567 /* If this is a long branch with its delay slot unfilled, set `nullify'
6568 as it can nullify the delay slot and save a nop. */
6569 if (length == 8 && dbr_sequence_length () == 0)
6570 nullify = 1;
6571
6572 /* If this is a short forward conditional branch which did not get
6573 its delay slot filled, the delay slot can still be nullified. */
6574 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6575 nullify = forward_branch_p (insn);
6576
6577 /* A forward branch over a single nullified insn can be done with a
6578 extrs instruction. This avoids a single cycle penalty due to
6579 mis-predicted branch if we fall through (branch not taken). */
6580
6581 if (length == 4
6582 && next_real_insn (insn) != 0
6583 && get_attr_length (next_real_insn (insn)) == 4
6584 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6585 && nullify)
6586 useskip = 1;
6587
6588 switch (length)
6589 {
6590
6591 /* All short conditional branches except backwards with an unfilled
6592 delay slot. */
6593 case 4:
6594 if (useskip)
6595 strcpy (buf, "{extrs,|extrw,s,}");
6596 else
6597 strcpy (buf, "bb,");
6598 if (useskip && GET_MODE (operands[0]) == DImode)
6599 strcpy (buf, "extrd,s,*");
6600 else if (GET_MODE (operands[0]) == DImode)
6601 strcpy (buf, "bb,*");
6602 if ((which == 0 && negated)
6603 || (which == 1 && ! negated))
6604 strcat (buf, ">=");
6605 else
6606 strcat (buf, "<");
6607 if (useskip)
6608 strcat (buf, " %0,%1,1,%%r0");
6609 else if (nullify && negated)
6610 {
6611 if (branch_needs_nop_p (insn))
6612 strcat (buf, ",n %0,%1,%3%#");
6613 else
6614 strcat (buf, ",n %0,%1,%3");
6615 }
6616 else if (nullify && ! negated)
6617 {
6618 if (branch_needs_nop_p (insn))
6619 strcat (buf, ",n %0,%1,%2%#");
6620 else
6621 strcat (buf, ",n %0,%1,%2");
6622 }
6623 else if (! nullify && negated)
6624 strcat (buf, " %0,%1,%3");
6625 else if (! nullify && ! negated)
6626 strcat (buf, " %0,%1,%2");
6627 break;
6628
6629 /* All long conditionals. Note a short backward branch with an
6630 unfilled delay slot is treated just like a long backward branch
6631 with an unfilled delay slot. */
6632 case 8:
6633 /* Handle weird backwards branch with a filled delay slot
6634 which is nullified. */
6635 if (dbr_sequence_length () != 0
6636 && ! forward_branch_p (insn)
6637 && nullify)
6638 {
6639 strcpy (buf, "bb,");
6640 if (GET_MODE (operands[0]) == DImode)
6641 strcat (buf, "*");
6642 if ((which == 0 && negated)
6643 || (which == 1 && ! negated))
6644 strcat (buf, "<");
6645 else
6646 strcat (buf, ">=");
6647 if (negated)
6648 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6649 else
6650 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6651 }
6652 /* Handle short backwards branch with an unfilled delay slot.
6653 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6654 taken and untaken branches. */
6655 else if (dbr_sequence_length () == 0
6656 && ! forward_branch_p (insn)
6657 && INSN_ADDRESSES_SET_P ()
6658 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6659 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6660 {
6661 strcpy (buf, "bb,");
6662 if (GET_MODE (operands[0]) == DImode)
6663 strcat (buf, "*");
6664 if ((which == 0 && negated)
6665 || (which == 1 && ! negated))
6666 strcat (buf, ">=");
6667 else
6668 strcat (buf, "<");
6669 if (negated)
6670 strcat (buf, " %0,%1,%3%#");
6671 else
6672 strcat (buf, " %0,%1,%2%#");
6673 }
6674 else
6675 {
6676 if (GET_MODE (operands[0]) == DImode)
6677 strcpy (buf, "extrd,s,*");
6678 else
6679 strcpy (buf, "{extrs,|extrw,s,}");
6680 if ((which == 0 && negated)
6681 || (which == 1 && ! negated))
6682 strcat (buf, "<");
6683 else
6684 strcat (buf, ">=");
6685 if (nullify && negated)
6686 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6687 else if (nullify && ! negated)
6688 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6689 else if (negated)
6690 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6691 else
6692 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6693 }
6694 break;
6695
6696 default:
6697 /* The reversed conditional branch must branch over one additional
6698 instruction if the delay slot is filled and needs to be extracted
6699 by output_lbranch. If the delay slot is empty or this is a
6700 nullified forward branch, the instruction after the reversed
6701 condition branch must be nullified. */
6702 if (dbr_sequence_length () == 0
6703 || (nullify && forward_branch_p (insn)))
6704 {
6705 nullify = 1;
6706 xdelay = 0;
6707 operands[4] = GEN_INT (length);
6708 }
6709 else
6710 {
6711 xdelay = 1;
6712 operands[4] = GEN_INT (length + 4);
6713 }
6714
6715 if (GET_MODE (operands[0]) == DImode)
6716 strcpy (buf, "bb,*");
6717 else
6718 strcpy (buf, "bb,");
6719 if ((which == 0 && negated)
6720 || (which == 1 && !negated))
6721 strcat (buf, "<");
6722 else
6723 strcat (buf, ">=");
6724 if (nullify)
6725 strcat (buf, ",n %0,%1,.+%4");
6726 else
6727 strcat (buf, " %0,%1,.+%4");
6728 output_asm_insn (buf, operands);
6729 return output_lbranch (negated ? operands[3] : operands[2],
6730 insn, xdelay);
6731 }
6732 return buf;
6733 }
6734
6735 /* This routine handles all the branch-on-variable-bit conditional branch
6736 sequences we might need to generate. It handles nullification of delay
6737 slots, varying length branches, negated branches and all combinations
6738 of the above. it returns the appropriate output template to emit the
6739 branch. */
6740
6741 const char *
6742 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6743 {
6744 static char buf[100];
6745 int useskip = 0;
6746 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6747 int length = get_attr_length (insn);
6748 int xdelay;
6749
6750 /* A conditional branch to the following instruction (e.g. the delay slot) is
6751 asking for a disaster. I do not think this can happen as this pattern
6752 is only used when optimizing; jump optimization should eliminate the
6753 jump. But be prepared just in case. */
6754
6755 if (branch_to_delay_slot_p (insn))
6756 return "nop";
6757
6758 /* If this is a long branch with its delay slot unfilled, set `nullify'
6759 as it can nullify the delay slot and save a nop. */
6760 if (length == 8 && dbr_sequence_length () == 0)
6761 nullify = 1;
6762
6763 /* If this is a short forward conditional branch which did not get
6764 its delay slot filled, the delay slot can still be nullified. */
6765 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6766 nullify = forward_branch_p (insn);
6767
6768 /* A forward branch over a single nullified insn can be done with a
6769 extrs instruction. This avoids a single cycle penalty due to
6770 mis-predicted branch if we fall through (branch not taken). */
6771
6772 if (length == 4
6773 && next_real_insn (insn) != 0
6774 && get_attr_length (next_real_insn (insn)) == 4
6775 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6776 && nullify)
6777 useskip = 1;
6778
6779 switch (length)
6780 {
6781
6782 /* All short conditional branches except backwards with an unfilled
6783 delay slot. */
6784 case 4:
6785 if (useskip)
6786 strcpy (buf, "{vextrs,|extrw,s,}");
6787 else
6788 strcpy (buf, "{bvb,|bb,}");
6789 if (useskip && GET_MODE (operands[0]) == DImode)
6790 strcpy (buf, "extrd,s,*");
6791 else if (GET_MODE (operands[0]) == DImode)
6792 strcpy (buf, "bb,*");
6793 if ((which == 0 && negated)
6794 || (which == 1 && ! negated))
6795 strcat (buf, ">=");
6796 else
6797 strcat (buf, "<");
6798 if (useskip)
6799 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6800 else if (nullify && negated)
6801 {
6802 if (branch_needs_nop_p (insn))
6803 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6804 else
6805 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6806 }
6807 else if (nullify && ! negated)
6808 {
6809 if (branch_needs_nop_p (insn))
6810 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6811 else
6812 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6813 }
6814 else if (! nullify && negated)
6815 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6816 else if (! nullify && ! negated)
6817 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6818 break;
6819
6820 /* All long conditionals. Note a short backward branch with an
6821 unfilled delay slot is treated just like a long backward branch
6822 with an unfilled delay slot. */
6823 case 8:
6824 /* Handle weird backwards branch with a filled delay slot
6825 which is nullified. */
6826 if (dbr_sequence_length () != 0
6827 && ! forward_branch_p (insn)
6828 && nullify)
6829 {
6830 strcpy (buf, "{bvb,|bb,}");
6831 if (GET_MODE (operands[0]) == DImode)
6832 strcat (buf, "*");
6833 if ((which == 0 && negated)
6834 || (which == 1 && ! negated))
6835 strcat (buf, "<");
6836 else
6837 strcat (buf, ">=");
6838 if (negated)
6839 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6840 else
6841 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6842 }
6843 /* Handle short backwards branch with an unfilled delay slot.
6844 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6845 taken and untaken branches. */
6846 else if (dbr_sequence_length () == 0
6847 && ! forward_branch_p (insn)
6848 && INSN_ADDRESSES_SET_P ()
6849 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6850 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6851 {
6852 strcpy (buf, "{bvb,|bb,}");
6853 if (GET_MODE (operands[0]) == DImode)
6854 strcat (buf, "*");
6855 if ((which == 0 && negated)
6856 || (which == 1 && ! negated))
6857 strcat (buf, ">=");
6858 else
6859 strcat (buf, "<");
6860 if (negated)
6861 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6862 else
6863 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6864 }
6865 else
6866 {
6867 strcpy (buf, "{vextrs,|extrw,s,}");
6868 if (GET_MODE (operands[0]) == DImode)
6869 strcpy (buf, "extrd,s,*");
6870 if ((which == 0 && negated)
6871 || (which == 1 && ! negated))
6872 strcat (buf, "<");
6873 else
6874 strcat (buf, ">=");
6875 if (nullify && negated)
6876 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6877 else if (nullify && ! negated)
6878 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6879 else if (negated)
6880 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6881 else
6882 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6883 }
6884 break;
6885
6886 default:
6887 /* The reversed conditional branch must branch over one additional
6888 instruction if the delay slot is filled and needs to be extracted
6889 by output_lbranch. If the delay slot is empty or this is a
6890 nullified forward branch, the instruction after the reversed
6891 condition branch must be nullified. */
6892 if (dbr_sequence_length () == 0
6893 || (nullify && forward_branch_p (insn)))
6894 {
6895 nullify = 1;
6896 xdelay = 0;
6897 operands[4] = GEN_INT (length);
6898 }
6899 else
6900 {
6901 xdelay = 1;
6902 operands[4] = GEN_INT (length + 4);
6903 }
6904
6905 if (GET_MODE (operands[0]) == DImode)
6906 strcpy (buf, "bb,*");
6907 else
6908 strcpy (buf, "{bvb,|bb,}");
6909 if ((which == 0 && negated)
6910 || (which == 1 && !negated))
6911 strcat (buf, "<");
6912 else
6913 strcat (buf, ">=");
6914 if (nullify)
6915 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6916 else
6917 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6918 output_asm_insn (buf, operands);
6919 return output_lbranch (negated ? operands[3] : operands[2],
6920 insn, xdelay);
6921 }
6922 return buf;
6923 }
6924
6925 /* Return the output template for emitting a dbra type insn.
6926
6927 Note it may perform some output operations on its own before
6928 returning the final output string. */
6929 const char *
6930 output_dbra (rtx *operands, rtx insn, int which_alternative)
6931 {
6932 int length = get_attr_length (insn);
6933
6934 /* A conditional branch to the following instruction (e.g. the delay slot) is
6935 asking for a disaster. Be prepared! */
6936
6937 if (branch_to_delay_slot_p (insn))
6938 {
6939 if (which_alternative == 0)
6940 return "ldo %1(%0),%0";
6941 else if (which_alternative == 1)
6942 {
6943 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6944 output_asm_insn ("ldw -16(%%r30),%4", operands);
6945 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6946 return "{fldws|fldw} -16(%%r30),%0";
6947 }
6948 else
6949 {
6950 output_asm_insn ("ldw %0,%4", operands);
6951 return "ldo %1(%4),%4\n\tstw %4,%0";
6952 }
6953 }
6954
6955 if (which_alternative == 0)
6956 {
6957 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6958 int xdelay;
6959
6960 /* If this is a long branch with its delay slot unfilled, set `nullify'
6961 as it can nullify the delay slot and save a nop. */
6962 if (length == 8 && dbr_sequence_length () == 0)
6963 nullify = 1;
6964
6965 /* If this is a short forward conditional branch which did not get
6966 its delay slot filled, the delay slot can still be nullified. */
6967 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6968 nullify = forward_branch_p (insn);
6969
6970 switch (length)
6971 {
6972 case 4:
6973 if (nullify)
6974 {
6975 if (branch_needs_nop_p (insn))
6976 return "addib,%C2,n %1,%0,%3%#";
6977 else
6978 return "addib,%C2,n %1,%0,%3";
6979 }
6980 else
6981 return "addib,%C2 %1,%0,%3";
6982
6983 case 8:
6984 /* Handle weird backwards branch with a fulled delay slot
6985 which is nullified. */
6986 if (dbr_sequence_length () != 0
6987 && ! forward_branch_p (insn)
6988 && nullify)
6989 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6990 /* Handle short backwards branch with an unfilled delay slot.
6991 Using a addb;nop rather than addi;bl saves 1 cycle for both
6992 taken and untaken branches. */
6993 else if (dbr_sequence_length () == 0
6994 && ! forward_branch_p (insn)
6995 && INSN_ADDRESSES_SET_P ()
6996 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6997 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6998 return "addib,%C2 %1,%0,%3%#";
6999
7000 /* Handle normal cases. */
7001 if (nullify)
7002 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7003 else
7004 return "addi,%N2 %1,%0,%0\n\tb %3";
7005
7006 default:
7007 /* The reversed conditional branch must branch over one additional
7008 instruction if the delay slot is filled and needs to be extracted
7009 by output_lbranch. If the delay slot is empty or this is a
7010 nullified forward branch, the instruction after the reversed
7011 condition branch must be nullified. */
7012 if (dbr_sequence_length () == 0
7013 || (nullify && forward_branch_p (insn)))
7014 {
7015 nullify = 1;
7016 xdelay = 0;
7017 operands[4] = GEN_INT (length);
7018 }
7019 else
7020 {
7021 xdelay = 1;
7022 operands[4] = GEN_INT (length + 4);
7023 }
7024
7025 if (nullify)
7026 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7027 else
7028 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7029
7030 return output_lbranch (operands[3], insn, xdelay);
7031 }
7032
7033 }
7034 /* Deal with gross reload from FP register case. */
7035 else if (which_alternative == 1)
7036 {
7037 /* Move loop counter from FP register to MEM then into a GR,
7038 increment the GR, store the GR into MEM, and finally reload
7039 the FP register from MEM from within the branch's delay slot. */
7040 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7041 operands);
7042 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7043 if (length == 24)
7044 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7045 else if (length == 28)
7046 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7047 else
7048 {
7049 operands[5] = GEN_INT (length - 16);
7050 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7051 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7052 return output_lbranch (operands[3], insn, 0);
7053 }
7054 }
7055 /* Deal with gross reload from memory case. */
7056 else
7057 {
7058 /* Reload loop counter from memory, the store back to memory
7059 happens in the branch's delay slot. */
7060 output_asm_insn ("ldw %0,%4", operands);
7061 if (length == 12)
7062 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7063 else if (length == 16)
7064 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7065 else
7066 {
7067 operands[5] = GEN_INT (length - 4);
7068 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7069 return output_lbranch (operands[3], insn, 0);
7070 }
7071 }
7072 }
7073
7074 /* Return the output template for emitting a movb type insn.
7075
7076 Note it may perform some output operations on its own before
7077 returning the final output string. */
7078 const char *
7079 output_movb (rtx *operands, rtx insn, int which_alternative,
7080 int reverse_comparison)
7081 {
7082 int length = get_attr_length (insn);
7083
7084 /* A conditional branch to the following instruction (e.g. the delay slot) is
7085 asking for a disaster. Be prepared! */
7086
7087 if (branch_to_delay_slot_p (insn))
7088 {
7089 if (which_alternative == 0)
7090 return "copy %1,%0";
7091 else if (which_alternative == 1)
7092 {
7093 output_asm_insn ("stw %1,-16(%%r30)", operands);
7094 return "{fldws|fldw} -16(%%r30),%0";
7095 }
7096 else if (which_alternative == 2)
7097 return "stw %1,%0";
7098 else
7099 return "mtsar %r1";
7100 }
7101
7102 /* Support the second variant. */
7103 if (reverse_comparison)
7104 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7105
7106 if (which_alternative == 0)
7107 {
7108 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7109 int xdelay;
7110
7111 /* If this is a long branch with its delay slot unfilled, set `nullify'
7112 as it can nullify the delay slot and save a nop. */
7113 if (length == 8 && dbr_sequence_length () == 0)
7114 nullify = 1;
7115
7116 /* If this is a short forward conditional branch which did not get
7117 its delay slot filled, the delay slot can still be nullified. */
7118 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7119 nullify = forward_branch_p (insn);
7120
7121 switch (length)
7122 {
7123 case 4:
7124 if (nullify)
7125 {
7126 if (branch_needs_nop_p (insn))
7127 return "movb,%C2,n %1,%0,%3%#";
7128 else
7129 return "movb,%C2,n %1,%0,%3";
7130 }
7131 else
7132 return "movb,%C2 %1,%0,%3";
7133
7134 case 8:
7135 /* Handle weird backwards branch with a filled delay slot
7136 which is nullified. */
7137 if (dbr_sequence_length () != 0
7138 && ! forward_branch_p (insn)
7139 && nullify)
7140 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7141
7142 /* Handle short backwards branch with an unfilled delay slot.
7143 Using a movb;nop rather than or;bl saves 1 cycle for both
7144 taken and untaken branches. */
7145 else if (dbr_sequence_length () == 0
7146 && ! forward_branch_p (insn)
7147 && INSN_ADDRESSES_SET_P ()
7148 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7149 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7150 return "movb,%C2 %1,%0,%3%#";
7151 /* Handle normal cases. */
7152 if (nullify)
7153 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7154 else
7155 return "or,%N2 %1,%%r0,%0\n\tb %3";
7156
7157 default:
7158 /* The reversed conditional branch must branch over one additional
7159 instruction if the delay slot is filled and needs to be extracted
7160 by output_lbranch. If the delay slot is empty or this is a
7161 nullified forward branch, the instruction after the reversed
7162 condition branch must be nullified. */
7163 if (dbr_sequence_length () == 0
7164 || (nullify && forward_branch_p (insn)))
7165 {
7166 nullify = 1;
7167 xdelay = 0;
7168 operands[4] = GEN_INT (length);
7169 }
7170 else
7171 {
7172 xdelay = 1;
7173 operands[4] = GEN_INT (length + 4);
7174 }
7175
7176 if (nullify)
7177 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7178 else
7179 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7180
7181 return output_lbranch (operands[3], insn, xdelay);
7182 }
7183 }
7184 /* Deal with gross reload for FP destination register case. */
7185 else if (which_alternative == 1)
7186 {
7187 /* Move source register to MEM, perform the branch test, then
7188 finally load the FP register from MEM from within the branch's
7189 delay slot. */
7190 output_asm_insn ("stw %1,-16(%%r30)", operands);
7191 if (length == 12)
7192 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7193 else if (length == 16)
7194 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7195 else
7196 {
7197 operands[4] = GEN_INT (length - 4);
7198 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7199 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7200 return output_lbranch (operands[3], insn, 0);
7201 }
7202 }
7203 /* Deal with gross reload from memory case. */
7204 else if (which_alternative == 2)
7205 {
7206 /* Reload loop counter from memory, the store back to memory
7207 happens in the branch's delay slot. */
7208 if (length == 8)
7209 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7210 else if (length == 12)
7211 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7212 else
7213 {
7214 operands[4] = GEN_INT (length);
7215 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7216 operands);
7217 return output_lbranch (operands[3], insn, 0);
7218 }
7219 }
7220 /* Handle SAR as a destination. */
7221 else
7222 {
7223 if (length == 8)
7224 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7225 else if (length == 12)
7226 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7227 else
7228 {
7229 operands[4] = GEN_INT (length);
7230 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7231 operands);
7232 return output_lbranch (operands[3], insn, 0);
7233 }
7234 }
7235 }
7236
7237 /* Copy any FP arguments in INSN into integer registers. */
7238 static void
7239 copy_fp_args (rtx insn)
7240 {
7241 rtx link;
7242 rtx xoperands[2];
7243
7244 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7245 {
7246 int arg_mode, regno;
7247 rtx use = XEXP (link, 0);
7248
7249 if (! (GET_CODE (use) == USE
7250 && GET_CODE (XEXP (use, 0)) == REG
7251 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7252 continue;
7253
7254 arg_mode = GET_MODE (XEXP (use, 0));
7255 regno = REGNO (XEXP (use, 0));
7256
7257 /* Is it a floating point register? */
7258 if (regno >= 32 && regno <= 39)
7259 {
7260 /* Copy the FP register into an integer register via memory. */
7261 if (arg_mode == SFmode)
7262 {
7263 xoperands[0] = XEXP (use, 0);
7264 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7265 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7266 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7267 }
7268 else
7269 {
7270 xoperands[0] = XEXP (use, 0);
7271 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7272 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7273 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7274 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7275 }
7276 }
7277 }
7278 }
7279
7280 /* Compute length of the FP argument copy sequence for INSN. */
7281 static int
7282 length_fp_args (rtx insn)
7283 {
7284 int length = 0;
7285 rtx link;
7286
7287 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7288 {
7289 int arg_mode, regno;
7290 rtx use = XEXP (link, 0);
7291
7292 if (! (GET_CODE (use) == USE
7293 && GET_CODE (XEXP (use, 0)) == REG
7294 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7295 continue;
7296
7297 arg_mode = GET_MODE (XEXP (use, 0));
7298 regno = REGNO (XEXP (use, 0));
7299
7300 /* Is it a floating point register? */
7301 if (regno >= 32 && regno <= 39)
7302 {
7303 if (arg_mode == SFmode)
7304 length += 8;
7305 else
7306 length += 12;
7307 }
7308 }
7309
7310 return length;
7311 }
7312
7313 /* Return the attribute length for the millicode call instruction INSN.
7314 The length must match the code generated by output_millicode_call.
7315 We include the delay slot in the returned length as it is better to
7316 over estimate the length than to under estimate it. */
7317
7318 int
7319 attr_length_millicode_call (rtx insn)
7320 {
7321 unsigned long distance = -1;
7322 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7323
7324 if (INSN_ADDRESSES_SET_P ())
7325 {
7326 distance = (total + insn_current_reference_address (insn));
7327 if (distance < total)
7328 distance = -1;
7329 }
7330
7331 if (TARGET_64BIT)
7332 {
7333 if (!TARGET_LONG_CALLS && distance < 7600000)
7334 return 8;
7335
7336 return 20;
7337 }
7338 else if (TARGET_PORTABLE_RUNTIME)
7339 return 24;
7340 else
7341 {
7342 if (!TARGET_LONG_CALLS && distance < 240000)
7343 return 8;
7344
7345 if (TARGET_LONG_ABS_CALL && !flag_pic)
7346 return 12;
7347
7348 return 24;
7349 }
7350 }
7351
7352 /* INSN is a function call. It may have an unconditional jump
7353 in its delay slot.
7354
7355 CALL_DEST is the routine we are calling. */
7356
7357 const char *
7358 output_millicode_call (rtx insn, rtx call_dest)
7359 {
7360 int attr_length = get_attr_length (insn);
7361 int seq_length = dbr_sequence_length ();
7362 int distance;
7363 rtx seq_insn;
7364 rtx xoperands[3];
7365
7366 xoperands[0] = call_dest;
7367 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7368
7369 /* Handle the common case where we are sure that the branch will
7370 reach the beginning of the $CODE$ subspace. The within reach
7371 form of the $$sh_func_adrs call has a length of 28. Because
7372 it has an attribute type of multi, it never has a nonzero
7373 sequence length. The length of the $$sh_func_adrs is the same
7374 as certain out of reach PIC calls to other routines. */
7375 if (!TARGET_LONG_CALLS
7376 && ((seq_length == 0
7377 && (attr_length == 12
7378 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7379 || (seq_length != 0 && attr_length == 8)))
7380 {
7381 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7382 }
7383 else
7384 {
7385 if (TARGET_64BIT)
7386 {
7387 /* It might seem that one insn could be saved by accessing
7388 the millicode function using the linkage table. However,
7389 this doesn't work in shared libraries and other dynamically
7390 loaded objects. Using a pc-relative sequence also avoids
7391 problems related to the implicit use of the gp register. */
7392 output_asm_insn ("b,l .+8,%%r1", xoperands);
7393
7394 if (TARGET_GAS)
7395 {
7396 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7397 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7398 }
7399 else
7400 {
7401 xoperands[1] = gen_label_rtx ();
7402 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7403 targetm.asm_out.internal_label (asm_out_file, "L",
7404 CODE_LABEL_NUMBER (xoperands[1]));
7405 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7406 }
7407
7408 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7409 }
7410 else if (TARGET_PORTABLE_RUNTIME)
7411 {
7412 /* Pure portable runtime doesn't allow be/ble; we also don't
7413 have PIC support in the assembler/linker, so this sequence
7414 is needed. */
7415
7416 /* Get the address of our target into %r1. */
7417 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7418 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7419
7420 /* Get our return address into %r31. */
7421 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7422 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7423
7424 /* Jump to our target address in %r1. */
7425 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7426 }
7427 else if (!flag_pic)
7428 {
7429 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7430 if (TARGET_PA_20)
7431 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7432 else
7433 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7434 }
7435 else
7436 {
7437 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7438 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7439
7440 if (TARGET_SOM || !TARGET_GAS)
7441 {
7442 /* The HP assembler can generate relocations for the
7443 difference of two symbols. GAS can do this for a
7444 millicode symbol but not an arbitrary external
7445 symbol when generating SOM output. */
7446 xoperands[1] = gen_label_rtx ();
7447 targetm.asm_out.internal_label (asm_out_file, "L",
7448 CODE_LABEL_NUMBER (xoperands[1]));
7449 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7450 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7451 }
7452 else
7453 {
7454 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7455 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7456 xoperands);
7457 }
7458
7459 /* Jump to our target address in %r1. */
7460 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7461 }
7462 }
7463
7464 if (seq_length == 0)
7465 output_asm_insn ("nop", xoperands);
7466
7467 /* We are done if there isn't a jump in the delay slot. */
7468 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7469 return "";
7470
7471 /* This call has an unconditional jump in its delay slot. */
7472 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7473
7474 /* See if the return address can be adjusted. Use the containing
7475 sequence insn's address. */
7476 if (INSN_ADDRESSES_SET_P ())
7477 {
7478 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7479 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7480 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7481
7482 if (VAL_14_BITS_P (distance))
7483 {
7484 xoperands[1] = gen_label_rtx ();
7485 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7486 targetm.asm_out.internal_label (asm_out_file, "L",
7487 CODE_LABEL_NUMBER (xoperands[1]));
7488 }
7489 else
7490 /* ??? This branch may not reach its target. */
7491 output_asm_insn ("nop\n\tb,n %0", xoperands);
7492 }
7493 else
7494 /* ??? This branch may not reach its target. */
7495 output_asm_insn ("nop\n\tb,n %0", xoperands);
7496
7497 /* Delete the jump. */
7498 SET_INSN_DELETED (NEXT_INSN (insn));
7499
7500 return "";
7501 }
7502
7503 /* Return the attribute length of the call instruction INSN. The SIBCALL
7504 flag indicates whether INSN is a regular call or a sibling call. The
7505 length returned must be longer than the code actually generated by
7506 output_call. Since branch shortening is done before delay branch
7507 sequencing, there is no way to determine whether or not the delay
7508 slot will be filled during branch shortening. Even when the delay
7509 slot is filled, we may have to add a nop if the delay slot contains
7510 a branch that can't reach its target. Thus, we always have to include
7511 the delay slot in the length estimate. This used to be done in
7512 pa_adjust_insn_length but we do it here now as some sequences always
7513 fill the delay slot and we can save four bytes in the estimate for
7514 these sequences. */
7515
7516 int
7517 attr_length_call (rtx insn, int sibcall)
7518 {
7519 int local_call;
7520 rtx call, call_dest;
7521 tree call_decl;
7522 int length = 0;
7523 rtx pat = PATTERN (insn);
7524 unsigned long distance = -1;
7525
7526 gcc_assert (GET_CODE (insn) == CALL_INSN);
7527
7528 if (INSN_ADDRESSES_SET_P ())
7529 {
7530 unsigned long total;
7531
7532 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7533 distance = (total + insn_current_reference_address (insn));
7534 if (distance < total)
7535 distance = -1;
7536 }
7537
7538 gcc_assert (GET_CODE (pat) == PARALLEL);
7539
7540 /* Get the call rtx. */
7541 call = XVECEXP (pat, 0, 0);
7542 if (GET_CODE (call) == SET)
7543 call = SET_SRC (call);
7544
7545 gcc_assert (GET_CODE (call) == CALL);
7546
7547 /* Determine if this is a local call. */
7548 call_dest = XEXP (XEXP (call, 0), 0);
7549 call_decl = SYMBOL_REF_DECL (call_dest);
7550 local_call = call_decl && targetm.binds_local_p (call_decl);
7551
7552 /* pc-relative branch. */
7553 if (!TARGET_LONG_CALLS
7554 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7555 || distance < 240000))
7556 length += 8;
7557
7558 /* 64-bit plabel sequence. */
7559 else if (TARGET_64BIT && !local_call)
7560 length += sibcall ? 28 : 24;
7561
7562 /* non-pic long absolute branch sequence. */
7563 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7564 length += 12;
7565
7566 /* long pc-relative branch sequence. */
7567 else if (TARGET_LONG_PIC_SDIFF_CALL
7568 || (TARGET_GAS && !TARGET_SOM
7569 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7570 {
7571 length += 20;
7572
7573 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7574 length += 8;
7575 }
7576
7577 /* 32-bit plabel sequence. */
7578 else
7579 {
7580 length += 32;
7581
7582 if (TARGET_SOM)
7583 length += length_fp_args (insn);
7584
7585 if (flag_pic)
7586 length += 4;
7587
7588 if (!TARGET_PA_20)
7589 {
7590 if (!sibcall)
7591 length += 8;
7592
7593 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7594 length += 8;
7595 }
7596 }
7597
7598 return length;
7599 }
7600
7601 /* INSN is a function call. It may have an unconditional jump
7602 in its delay slot.
7603
7604 CALL_DEST is the routine we are calling. */
7605
7606 const char *
7607 output_call (rtx insn, rtx call_dest, int sibcall)
7608 {
7609 int delay_insn_deleted = 0;
7610 int delay_slot_filled = 0;
7611 int seq_length = dbr_sequence_length ();
7612 tree call_decl = SYMBOL_REF_DECL (call_dest);
7613 int local_call = call_decl && targetm.binds_local_p (call_decl);
7614 rtx xoperands[2];
7615
7616 xoperands[0] = call_dest;
7617
7618 /* Handle the common case where we're sure that the branch will reach
7619 the beginning of the "$CODE$" subspace. This is the beginning of
7620 the current function if we are in a named section. */
7621 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7622 {
7623 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7624 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7625 }
7626 else
7627 {
7628 if (TARGET_64BIT && !local_call)
7629 {
7630 /* ??? As far as I can tell, the HP linker doesn't support the
7631 long pc-relative sequence described in the 64-bit runtime
7632 architecture. So, we use a slightly longer indirect call. */
7633 xoperands[0] = get_deferred_plabel (call_dest);
7634 xoperands[1] = gen_label_rtx ();
7635
7636 /* If this isn't a sibcall, we put the load of %r27 into the
7637 delay slot. We can't do this in a sibcall as we don't
7638 have a second call-clobbered scratch register available. */
7639 if (seq_length != 0
7640 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7641 && !sibcall)
7642 {
7643 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7644 optimize, 0, NULL);
7645
7646 /* Now delete the delay insn. */
7647 SET_INSN_DELETED (NEXT_INSN (insn));
7648 delay_insn_deleted = 1;
7649 }
7650
7651 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7652 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7653 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7654
7655 if (sibcall)
7656 {
7657 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7658 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7659 output_asm_insn ("bve (%%r1)", xoperands);
7660 }
7661 else
7662 {
7663 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7664 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7665 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7666 delay_slot_filled = 1;
7667 }
7668 }
7669 else
7670 {
7671 int indirect_call = 0;
7672
7673 /* Emit a long call. There are several different sequences
7674 of increasing length and complexity. In most cases,
7675 they don't allow an instruction in the delay slot. */
7676 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7677 && !TARGET_LONG_PIC_SDIFF_CALL
7678 && !(TARGET_GAS && !TARGET_SOM
7679 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7680 && !TARGET_64BIT)
7681 indirect_call = 1;
7682
7683 if (seq_length != 0
7684 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7685 && !sibcall
7686 && (!TARGET_PA_20
7687 || indirect_call
7688 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7689 {
7690 /* A non-jump insn in the delay slot. By definition we can
7691 emit this insn before the call (and in fact before argument
7692 relocating. */
7693 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7694 NULL);
7695
7696 /* Now delete the delay insn. */
7697 SET_INSN_DELETED (NEXT_INSN (insn));
7698 delay_insn_deleted = 1;
7699 }
7700
7701 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7702 {
7703 /* This is the best sequence for making long calls in
7704 non-pic code. Unfortunately, GNU ld doesn't provide
7705 the stub needed for external calls, and GAS's support
7706 for this with the SOM linker is buggy. It is safe
7707 to use this for local calls. */
7708 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7709 if (sibcall)
7710 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7711 else
7712 {
7713 if (TARGET_PA_20)
7714 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7715 xoperands);
7716 else
7717 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7718
7719 output_asm_insn ("copy %%r31,%%r2", xoperands);
7720 delay_slot_filled = 1;
7721 }
7722 }
7723 else
7724 {
7725 if (TARGET_LONG_PIC_SDIFF_CALL)
7726 {
7727 /* The HP assembler and linker can handle relocations
7728 for the difference of two symbols. The HP assembler
7729 recognizes the sequence as a pc-relative call and
7730 the linker provides stubs when needed. */
7731 xoperands[1] = gen_label_rtx ();
7732 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7733 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7734 targetm.asm_out.internal_label (asm_out_file, "L",
7735 CODE_LABEL_NUMBER (xoperands[1]));
7736 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7737 }
7738 else if (TARGET_GAS && !TARGET_SOM
7739 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7740 {
7741 /* GAS currently can't generate the relocations that
7742 are needed for the SOM linker under HP-UX using this
7743 sequence. The GNU linker doesn't generate the stubs
7744 that are needed for external calls on TARGET_ELF32
7745 with this sequence. For now, we have to use a
7746 longer plabel sequence when using GAS. */
7747 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7748 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7749 xoperands);
7750 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7751 xoperands);
7752 }
7753 else
7754 {
7755 /* Emit a long plabel-based call sequence. This is
7756 essentially an inline implementation of $$dyncall.
7757 We don't actually try to call $$dyncall as this is
7758 as difficult as calling the function itself. */
7759 xoperands[0] = get_deferred_plabel (call_dest);
7760 xoperands[1] = gen_label_rtx ();
7761
7762 /* Since the call is indirect, FP arguments in registers
7763 need to be copied to the general registers. Then, the
7764 argument relocation stub will copy them back. */
7765 if (TARGET_SOM)
7766 copy_fp_args (insn);
7767
7768 if (flag_pic)
7769 {
7770 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7771 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7772 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7773 }
7774 else
7775 {
7776 output_asm_insn ("addil LR'%0-$global$,%%r27",
7777 xoperands);
7778 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7779 xoperands);
7780 }
7781
7782 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7783 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7784 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7785 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7786
7787 if (!sibcall && !TARGET_PA_20)
7788 {
7789 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7790 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7791 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7792 else
7793 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7794 }
7795 }
7796
7797 if (TARGET_PA_20)
7798 {
7799 if (sibcall)
7800 output_asm_insn ("bve (%%r1)", xoperands);
7801 else
7802 {
7803 if (indirect_call)
7804 {
7805 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7806 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7807 delay_slot_filled = 1;
7808 }
7809 else
7810 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7811 }
7812 }
7813 else
7814 {
7815 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7816 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7817 xoperands);
7818
7819 if (sibcall)
7820 {
7821 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7822 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7823 else
7824 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7825 }
7826 else
7827 {
7828 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7829 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7830 else
7831 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7832
7833 if (indirect_call)
7834 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7835 else
7836 output_asm_insn ("copy %%r31,%%r2", xoperands);
7837 delay_slot_filled = 1;
7838 }
7839 }
7840 }
7841 }
7842 }
7843
7844 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7845 output_asm_insn ("nop", xoperands);
7846
7847 /* We are done if there isn't a jump in the delay slot. */
7848 if (seq_length == 0
7849 || delay_insn_deleted
7850 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7851 return "";
7852
7853 /* A sibcall should never have a branch in the delay slot. */
7854 gcc_assert (!sibcall);
7855
7856 /* This call has an unconditional jump in its delay slot. */
7857 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7858
7859 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7860 {
7861 /* See if the return address can be adjusted. Use the containing
7862 sequence insn's address. This would break the regular call/return@
7863 relationship assumed by the table based eh unwinder, so only do that
7864 if the call is not possibly throwing. */
7865 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7866 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7867 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7868
7869 if (VAL_14_BITS_P (distance)
7870 && !(can_throw_internal (insn) || can_throw_external (insn)))
7871 {
7872 xoperands[1] = gen_label_rtx ();
7873 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7874 targetm.asm_out.internal_label (asm_out_file, "L",
7875 CODE_LABEL_NUMBER (xoperands[1]));
7876 }
7877 else
7878 output_asm_insn ("nop\n\tb,n %0", xoperands);
7879 }
7880 else
7881 output_asm_insn ("b,n %0", xoperands);
7882
7883 /* Delete the jump. */
7884 SET_INSN_DELETED (NEXT_INSN (insn));
7885
7886 return "";
7887 }
7888
7889 /* Return the attribute length of the indirect call instruction INSN.
7890 The length must match the code generated by output_indirect call.
7891 The returned length includes the delay slot. Currently, the delay
7892 slot of an indirect call sequence is not exposed and it is used by
7893 the sequence itself. */
7894
7895 int
7896 attr_length_indirect_call (rtx insn)
7897 {
7898 unsigned long distance = -1;
7899 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7900
7901 if (INSN_ADDRESSES_SET_P ())
7902 {
7903 distance = (total + insn_current_reference_address (insn));
7904 if (distance < total)
7905 distance = -1;
7906 }
7907
7908 if (TARGET_64BIT)
7909 return 12;
7910
7911 if (TARGET_FAST_INDIRECT_CALLS
7912 || (!TARGET_PORTABLE_RUNTIME
7913 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7914 || distance < 240000)))
7915 return 8;
7916
7917 if (flag_pic)
7918 return 24;
7919
7920 if (TARGET_PORTABLE_RUNTIME)
7921 return 20;
7922
7923 /* Out of reach, can use ble. */
7924 return 12;
7925 }
7926
7927 const char *
7928 output_indirect_call (rtx insn, rtx call_dest)
7929 {
7930 rtx xoperands[1];
7931
7932 if (TARGET_64BIT)
7933 {
7934 xoperands[0] = call_dest;
7935 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7936 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7937 return "";
7938 }
7939
7940 /* First the special case for kernels, level 0 systems, etc. */
7941 if (TARGET_FAST_INDIRECT_CALLS)
7942 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7943
7944 /* Now the normal case -- we can reach $$dyncall directly or
7945 we're sure that we can get there via a long-branch stub.
7946
7947 No need to check target flags as the length uniquely identifies
7948 the remaining cases. */
7949 if (attr_length_indirect_call (insn) == 8)
7950 {
7951 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7952 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7953 variant of the B,L instruction can't be used on the SOM target. */
7954 if (TARGET_PA_20 && !TARGET_SOM)
7955 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7956 else
7957 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7958 }
7959
7960 /* Long millicode call, but we are not generating PIC or portable runtime
7961 code. */
7962 if (attr_length_indirect_call (insn) == 12)
7963 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7964
7965 /* Long millicode call for portable runtime. */
7966 if (attr_length_indirect_call (insn) == 20)
7967 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7968
7969 /* We need a long PIC call to $$dyncall. */
7970 xoperands[0] = NULL_RTX;
7971 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7972 if (TARGET_SOM || !TARGET_GAS)
7973 {
7974 xoperands[0] = gen_label_rtx ();
7975 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7976 targetm.asm_out.internal_label (asm_out_file, "L",
7977 CODE_LABEL_NUMBER (xoperands[0]));
7978 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7979 }
7980 else
7981 {
7982 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7983 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7984 xoperands);
7985 }
7986 output_asm_insn ("blr %%r0,%%r2", xoperands);
7987 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7988 return "";
7989 }
7990
7991 /* Return the total length of the save and restore instructions needed for
7992 the data linkage table pointer (i.e., the PIC register) across the call
7993 instruction INSN. No-return calls do not require a save and restore.
7994 In addition, we may be able to avoid the save and restore for calls
7995 within the same translation unit. */
7996
7997 int
7998 attr_length_save_restore_dltp (rtx insn)
7999 {
8000 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
8001 return 0;
8002
8003 return 8;
8004 }
8005
8006 /* In HPUX 8.0's shared library scheme, special relocations are needed
8007 for function labels if they might be passed to a function
8008 in a shared library (because shared libraries don't live in code
8009 space), and special magic is needed to construct their address. */
8010
8011 void
8012 hppa_encode_label (rtx sym)
8013 {
8014 const char *str = XSTR (sym, 0);
8015 int len = strlen (str) + 1;
8016 char *newstr, *p;
8017
8018 p = newstr = XALLOCAVEC (char, len + 1);
8019 *p++ = '@';
8020 strcpy (p, str);
8021
8022 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8023 }
8024
8025 static void
8026 pa_encode_section_info (tree decl, rtx rtl, int first)
8027 {
8028 int old_referenced = 0;
8029
8030 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8031 old_referenced
8032 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8033
8034 default_encode_section_info (decl, rtl, first);
8035
8036 if (first && TEXT_SPACE_P (decl))
8037 {
8038 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8039 if (TREE_CODE (decl) == FUNCTION_DECL)
8040 hppa_encode_label (XEXP (rtl, 0));
8041 }
8042 else if (old_referenced)
8043 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8044 }
8045
8046 /* This is sort of inverse to pa_encode_section_info. */
8047
8048 static const char *
8049 pa_strip_name_encoding (const char *str)
8050 {
8051 str += (*str == '@');
8052 str += (*str == '*');
8053 return str;
8054 }
8055
8056 int
8057 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8058 {
8059 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
8060 }
8061
8062 /* Returns 1 if OP is a function label involved in a simple addition
8063 with a constant. Used to keep certain patterns from matching
8064 during instruction combination. */
8065 int
8066 is_function_label_plus_const (rtx op)
8067 {
8068 /* Strip off any CONST. */
8069 if (GET_CODE (op) == CONST)
8070 op = XEXP (op, 0);
8071
8072 return (GET_CODE (op) == PLUS
8073 && function_label_operand (XEXP (op, 0), Pmode)
8074 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8075 }
8076
8077 /* Output assembly code for a thunk to FUNCTION. */
8078
8079 static void
8080 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8081 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8082 tree function)
8083 {
8084 static unsigned int current_thunk_number;
8085 int val_14 = VAL_14_BITS_P (delta);
8086 unsigned int old_last_address = last_address, nbytes = 0;
8087 char label[16];
8088 rtx xoperands[4];
8089
8090 xoperands[0] = XEXP (DECL_RTL (function), 0);
8091 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8092 xoperands[2] = GEN_INT (delta);
8093
8094 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8095 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8096
8097 /* Output the thunk. We know that the function is in the same
8098 translation unit (i.e., the same space) as the thunk, and that
8099 thunks are output after their method. Thus, we don't need an
8100 external branch to reach the function. With SOM and GAS,
8101 functions and thunks are effectively in different sections.
8102 Thus, we can always use a IA-relative branch and the linker
8103 will add a long branch stub if necessary.
8104
8105 However, we have to be careful when generating PIC code on the
8106 SOM port to ensure that the sequence does not transfer to an
8107 import stub for the target function as this could clobber the
8108 return value saved at SP-24. This would also apply to the
8109 32-bit linux port if the multi-space model is implemented. */
8110 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8111 && !(flag_pic && TREE_PUBLIC (function))
8112 && (TARGET_GAS || last_address < 262132))
8113 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8114 && ((targetm.have_named_sections
8115 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8116 /* The GNU 64-bit linker has rather poor stub management.
8117 So, we use a long branch from thunks that aren't in
8118 the same section as the target function. */
8119 && ((!TARGET_64BIT
8120 && (DECL_SECTION_NAME (thunk_fndecl)
8121 != DECL_SECTION_NAME (function)))
8122 || ((DECL_SECTION_NAME (thunk_fndecl)
8123 == DECL_SECTION_NAME (function))
8124 && last_address < 262132)))
8125 || (targetm.have_named_sections
8126 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8127 && DECL_SECTION_NAME (function) == NULL
8128 && last_address < 262132)
8129 || (!targetm.have_named_sections && last_address < 262132))))
8130 {
8131 if (!val_14)
8132 output_asm_insn ("addil L'%2,%%r26", xoperands);
8133
8134 output_asm_insn ("b %0", xoperands);
8135
8136 if (val_14)
8137 {
8138 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8139 nbytes += 8;
8140 }
8141 else
8142 {
8143 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8144 nbytes += 12;
8145 }
8146 }
8147 else if (TARGET_64BIT)
8148 {
8149 /* We only have one call-clobbered scratch register, so we can't
8150 make use of the delay slot if delta doesn't fit in 14 bits. */
8151 if (!val_14)
8152 {
8153 output_asm_insn ("addil L'%2,%%r26", xoperands);
8154 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8155 }
8156
8157 output_asm_insn ("b,l .+8,%%r1", xoperands);
8158
8159 if (TARGET_GAS)
8160 {
8161 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8162 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8163 }
8164 else
8165 {
8166 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8167 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8168 }
8169
8170 if (val_14)
8171 {
8172 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8173 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8174 nbytes += 20;
8175 }
8176 else
8177 {
8178 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8179 nbytes += 24;
8180 }
8181 }
8182 else if (TARGET_PORTABLE_RUNTIME)
8183 {
8184 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8185 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8186
8187 if (!val_14)
8188 output_asm_insn ("addil L'%2,%%r26", xoperands);
8189
8190 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8191
8192 if (val_14)
8193 {
8194 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8195 nbytes += 16;
8196 }
8197 else
8198 {
8199 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8200 nbytes += 20;
8201 }
8202 }
8203 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8204 {
8205 /* The function is accessible from outside this module. The only
8206 way to avoid an import stub between the thunk and function is to
8207 call the function directly with an indirect sequence similar to
8208 that used by $$dyncall. This is possible because $$dyncall acts
8209 as the import stub in an indirect call. */
8210 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8211 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8212 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8213 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8214 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8215 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8216 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8217 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8218 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8219
8220 if (!val_14)
8221 {
8222 output_asm_insn ("addil L'%2,%%r26", xoperands);
8223 nbytes += 4;
8224 }
8225
8226 if (TARGET_PA_20)
8227 {
8228 output_asm_insn ("bve (%%r22)", xoperands);
8229 nbytes += 36;
8230 }
8231 else if (TARGET_NO_SPACE_REGS)
8232 {
8233 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8234 nbytes += 36;
8235 }
8236 else
8237 {
8238 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8239 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8240 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8241 nbytes += 44;
8242 }
8243
8244 if (val_14)
8245 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8246 else
8247 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8248 }
8249 else if (flag_pic)
8250 {
8251 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8252
8253 if (TARGET_SOM || !TARGET_GAS)
8254 {
8255 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8256 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8257 }
8258 else
8259 {
8260 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8261 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8262 }
8263
8264 if (!val_14)
8265 output_asm_insn ("addil L'%2,%%r26", xoperands);
8266
8267 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8268
8269 if (val_14)
8270 {
8271 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8272 nbytes += 20;
8273 }
8274 else
8275 {
8276 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8277 nbytes += 24;
8278 }
8279 }
8280 else
8281 {
8282 if (!val_14)
8283 output_asm_insn ("addil L'%2,%%r26", xoperands);
8284
8285 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8286 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8287
8288 if (val_14)
8289 {
8290 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8291 nbytes += 12;
8292 }
8293 else
8294 {
8295 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8296 nbytes += 16;
8297 }
8298 }
8299
8300 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8301
8302 if (TARGET_SOM && TARGET_GAS)
8303 {
8304 /* We done with this subspace except possibly for some additional
8305 debug information. Forget that we are in this subspace to ensure
8306 that the next function is output in its own subspace. */
8307 in_section = NULL;
8308 cfun->machine->in_nsubspa = 2;
8309 }
8310
8311 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8312 {
8313 switch_to_section (data_section);
8314 output_asm_insn (".align 4", xoperands);
8315 ASM_OUTPUT_LABEL (file, label);
8316 output_asm_insn (".word P'%0", xoperands);
8317 }
8318
8319 current_thunk_number++;
8320 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8321 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8322 last_address += nbytes;
8323 if (old_last_address > last_address)
8324 last_address = UINT_MAX;
8325 update_total_code_bytes (nbytes);
8326 }
8327
8328 /* Only direct calls to static functions are allowed to be sibling (tail)
8329 call optimized.
8330
8331 This restriction is necessary because some linker generated stubs will
8332 store return pointers into rp' in some cases which might clobber a
8333 live value already in rp'.
8334
8335 In a sibcall the current function and the target function share stack
8336 space. Thus if the path to the current function and the path to the
8337 target function save a value in rp', they save the value into the
8338 same stack slot, which has undesirable consequences.
8339
8340 Because of the deferred binding nature of shared libraries any function
8341 with external scope could be in a different load module and thus require
8342 rp' to be saved when calling that function. So sibcall optimizations
8343 can only be safe for static function.
8344
8345 Note that GCC never needs return value relocations, so we don't have to
8346 worry about static calls with return value relocations (which require
8347 saving rp').
8348
8349 It is safe to perform a sibcall optimization when the target function
8350 will never return. */
8351 static bool
8352 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8353 {
8354 if (TARGET_PORTABLE_RUNTIME)
8355 return false;
8356
8357 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8358 single subspace mode and the call is not indirect. As far as I know,
8359 there is no operating system support for the multiple subspace mode.
8360 It might be possible to support indirect calls if we didn't use
8361 $$dyncall (see the indirect sequence generated in output_call). */
8362 if (TARGET_ELF32)
8363 return (decl != NULL_TREE);
8364
8365 /* Sibcalls are not ok because the arg pointer register is not a fixed
8366 register. This prevents the sibcall optimization from occurring. In
8367 addition, there are problems with stub placement using GNU ld. This
8368 is because a normal sibcall branch uses a 17-bit relocation while
8369 a regular call branch uses a 22-bit relocation. As a result, more
8370 care needs to be taken in the placement of long-branch stubs. */
8371 if (TARGET_64BIT)
8372 return false;
8373
8374 /* Sibcalls are only ok within a translation unit. */
8375 return (decl && !TREE_PUBLIC (decl));
8376 }
8377
8378 /* ??? Addition is not commutative on the PA due to the weird implicit
8379 space register selection rules for memory addresses. Therefore, we
8380 don't consider a + b == b + a, as this might be inside a MEM. */
8381 static bool
8382 pa_commutative_p (const_rtx x, int outer_code)
8383 {
8384 return (COMMUTATIVE_P (x)
8385 && (TARGET_NO_SPACE_REGS
8386 || (outer_code != UNKNOWN && outer_code != MEM)
8387 || GET_CODE (x) != PLUS));
8388 }
8389
8390 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8391 use in fmpyadd instructions. */
8392 int
8393 fmpyaddoperands (rtx *operands)
8394 {
8395 enum machine_mode mode = GET_MODE (operands[0]);
8396
8397 /* Must be a floating point mode. */
8398 if (mode != SFmode && mode != DFmode)
8399 return 0;
8400
8401 /* All modes must be the same. */
8402 if (! (mode == GET_MODE (operands[1])
8403 && mode == GET_MODE (operands[2])
8404 && mode == GET_MODE (operands[3])
8405 && mode == GET_MODE (operands[4])
8406 && mode == GET_MODE (operands[5])))
8407 return 0;
8408
8409 /* All operands must be registers. */
8410 if (! (GET_CODE (operands[1]) == REG
8411 && GET_CODE (operands[2]) == REG
8412 && GET_CODE (operands[3]) == REG
8413 && GET_CODE (operands[4]) == REG
8414 && GET_CODE (operands[5]) == REG))
8415 return 0;
8416
8417 /* Only 2 real operands to the addition. One of the input operands must
8418 be the same as the output operand. */
8419 if (! rtx_equal_p (operands[3], operands[4])
8420 && ! rtx_equal_p (operands[3], operands[5]))
8421 return 0;
8422
8423 /* Inout operand of add cannot conflict with any operands from multiply. */
8424 if (rtx_equal_p (operands[3], operands[0])
8425 || rtx_equal_p (operands[3], operands[1])
8426 || rtx_equal_p (operands[3], operands[2]))
8427 return 0;
8428
8429 /* multiply cannot feed into addition operands. */
8430 if (rtx_equal_p (operands[4], operands[0])
8431 || rtx_equal_p (operands[5], operands[0]))
8432 return 0;
8433
8434 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8435 if (mode == SFmode
8436 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8437 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8438 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8439 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8440 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8441 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8442 return 0;
8443
8444 /* Passed. Operands are suitable for fmpyadd. */
8445 return 1;
8446 }
8447
8448 #if !defined(USE_COLLECT2)
8449 static void
8450 pa_asm_out_constructor (rtx symbol, int priority)
8451 {
8452 if (!function_label_operand (symbol, VOIDmode))
8453 hppa_encode_label (symbol);
8454
8455 #ifdef CTORS_SECTION_ASM_OP
8456 default_ctor_section_asm_out_constructor (symbol, priority);
8457 #else
8458 # ifdef TARGET_ASM_NAMED_SECTION
8459 default_named_section_asm_out_constructor (symbol, priority);
8460 # else
8461 default_stabs_asm_out_constructor (symbol, priority);
8462 # endif
8463 #endif
8464 }
8465
8466 static void
8467 pa_asm_out_destructor (rtx symbol, int priority)
8468 {
8469 if (!function_label_operand (symbol, VOIDmode))
8470 hppa_encode_label (symbol);
8471
8472 #ifdef DTORS_SECTION_ASM_OP
8473 default_dtor_section_asm_out_destructor (symbol, priority);
8474 #else
8475 # ifdef TARGET_ASM_NAMED_SECTION
8476 default_named_section_asm_out_destructor (symbol, priority);
8477 # else
8478 default_stabs_asm_out_destructor (symbol, priority);
8479 # endif
8480 #endif
8481 }
8482 #endif
8483
8484 /* This function places uninitialized global data in the bss section.
8485 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8486 function on the SOM port to prevent uninitialized global data from
8487 being placed in the data section. */
8488
8489 void
8490 pa_asm_output_aligned_bss (FILE *stream,
8491 const char *name,
8492 unsigned HOST_WIDE_INT size,
8493 unsigned int align)
8494 {
8495 switch_to_section (bss_section);
8496 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8497
8498 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8499 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8500 #endif
8501
8502 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8503 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8504 #endif
8505
8506 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8507 ASM_OUTPUT_LABEL (stream, name);
8508 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8509 }
8510
8511 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8512 that doesn't allow the alignment of global common storage to be directly
8513 specified. The SOM linker aligns common storage based on the rounded
8514 value of the NUM_BYTES parameter in the .comm directive. It's not
8515 possible to use the .align directive as it doesn't affect the alignment
8516 of the label associated with a .comm directive. */
8517
8518 void
8519 pa_asm_output_aligned_common (FILE *stream,
8520 const char *name,
8521 unsigned HOST_WIDE_INT size,
8522 unsigned int align)
8523 {
8524 unsigned int max_common_align;
8525
8526 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8527 if (align > max_common_align)
8528 {
8529 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8530 "for global common data. Using %u",
8531 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8532 align = max_common_align;
8533 }
8534
8535 switch_to_section (bss_section);
8536
8537 assemble_name (stream, name);
8538 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8539 MAX (size, align / BITS_PER_UNIT));
8540 }
8541
8542 /* We can't use .comm for local common storage as the SOM linker effectively
8543 treats the symbol as universal and uses the same storage for local symbols
8544 with the same name in different object files. The .block directive
8545 reserves an uninitialized block of storage. However, it's not common
8546 storage. Fortunately, GCC never requests common storage with the same
8547 name in any given translation unit. */
8548
8549 void
8550 pa_asm_output_aligned_local (FILE *stream,
8551 const char *name,
8552 unsigned HOST_WIDE_INT size,
8553 unsigned int align)
8554 {
8555 switch_to_section (bss_section);
8556 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8557
8558 #ifdef LOCAL_ASM_OP
8559 fprintf (stream, "%s", LOCAL_ASM_OP);
8560 assemble_name (stream, name);
8561 fprintf (stream, "\n");
8562 #endif
8563
8564 ASM_OUTPUT_LABEL (stream, name);
8565 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8566 }
8567
8568 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8569 use in fmpysub instructions. */
8570 int
8571 fmpysuboperands (rtx *operands)
8572 {
8573 enum machine_mode mode = GET_MODE (operands[0]);
8574
8575 /* Must be a floating point mode. */
8576 if (mode != SFmode && mode != DFmode)
8577 return 0;
8578
8579 /* All modes must be the same. */
8580 if (! (mode == GET_MODE (operands[1])
8581 && mode == GET_MODE (operands[2])
8582 && mode == GET_MODE (operands[3])
8583 && mode == GET_MODE (operands[4])
8584 && mode == GET_MODE (operands[5])))
8585 return 0;
8586
8587 /* All operands must be registers. */
8588 if (! (GET_CODE (operands[1]) == REG
8589 && GET_CODE (operands[2]) == REG
8590 && GET_CODE (operands[3]) == REG
8591 && GET_CODE (operands[4]) == REG
8592 && GET_CODE (operands[5]) == REG))
8593 return 0;
8594
8595 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8596 operation, so operands[4] must be the same as operand[3]. */
8597 if (! rtx_equal_p (operands[3], operands[4]))
8598 return 0;
8599
8600 /* multiply cannot feed into subtraction. */
8601 if (rtx_equal_p (operands[5], operands[0]))
8602 return 0;
8603
8604 /* Inout operand of sub cannot conflict with any operands from multiply. */
8605 if (rtx_equal_p (operands[3], operands[0])
8606 || rtx_equal_p (operands[3], operands[1])
8607 || rtx_equal_p (operands[3], operands[2]))
8608 return 0;
8609
8610 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8611 if (mode == SFmode
8612 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8613 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8614 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8615 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8616 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8617 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8618 return 0;
8619
8620 /* Passed. Operands are suitable for fmpysub. */
8621 return 1;
8622 }
8623
8624 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8625 constants for shadd instructions. */
8626 int
8627 shadd_constant_p (int val)
8628 {
8629 if (val == 2 || val == 4 || val == 8)
8630 return 1;
8631 else
8632 return 0;
8633 }
8634
8635 /* Return 1 if OP is valid as a base or index register in a
8636 REG+REG address. */
8637
8638 int
8639 borx_reg_operand (rtx op, enum machine_mode mode)
8640 {
8641 if (GET_CODE (op) != REG)
8642 return 0;
8643
8644 /* We must reject virtual registers as the only expressions that
8645 can be instantiated are REG and REG+CONST. */
8646 if (op == virtual_incoming_args_rtx
8647 || op == virtual_stack_vars_rtx
8648 || op == virtual_stack_dynamic_rtx
8649 || op == virtual_outgoing_args_rtx
8650 || op == virtual_cfa_rtx)
8651 return 0;
8652
8653 /* While it's always safe to index off the frame pointer, it's not
8654 profitable to do so when the frame pointer is being eliminated. */
8655 if (!reload_completed
8656 && flag_omit_frame_pointer
8657 && !cfun->calls_alloca
8658 && op == frame_pointer_rtx)
8659 return 0;
8660
8661 return register_operand (op, mode);
8662 }
8663
8664 /* Return 1 if this operand is anything other than a hard register. */
8665
8666 int
8667 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8668 {
8669 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8670 }
8671
8672 /* Return TRUE if INSN branches forward. */
8673
8674 static bool
8675 forward_branch_p (rtx insn)
8676 {
8677 rtx lab = JUMP_LABEL (insn);
8678
8679 /* The INSN must have a jump label. */
8680 gcc_assert (lab != NULL_RTX);
8681
8682 if (INSN_ADDRESSES_SET_P ())
8683 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8684
8685 while (insn)
8686 {
8687 if (insn == lab)
8688 return true;
8689 else
8690 insn = NEXT_INSN (insn);
8691 }
8692
8693 return false;
8694 }
8695
8696 /* Return 1 if OP is an equality comparison, else return 0. */
8697 int
8698 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8699 {
8700 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8701 }
8702
8703 /* Return 1 if INSN is in the delay slot of a call instruction. */
8704 int
8705 jump_in_call_delay (rtx insn)
8706 {
8707
8708 if (GET_CODE (insn) != JUMP_INSN)
8709 return 0;
8710
8711 if (PREV_INSN (insn)
8712 && PREV_INSN (PREV_INSN (insn))
8713 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8714 {
8715 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8716
8717 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8718 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8719
8720 }
8721 else
8722 return 0;
8723 }
8724
8725 /* Output an unconditional move and branch insn. */
8726
8727 const char *
8728 output_parallel_movb (rtx *operands, rtx insn)
8729 {
8730 int length = get_attr_length (insn);
8731
8732 /* These are the cases in which we win. */
8733 if (length == 4)
8734 return "mov%I1b,tr %1,%0,%2";
8735
8736 /* None of the following cases win, but they don't lose either. */
8737 if (length == 8)
8738 {
8739 if (dbr_sequence_length () == 0)
8740 {
8741 /* Nothing in the delay slot, fake it by putting the combined
8742 insn (the copy or add) in the delay slot of a bl. */
8743 if (GET_CODE (operands[1]) == CONST_INT)
8744 return "b %2\n\tldi %1,%0";
8745 else
8746 return "b %2\n\tcopy %1,%0";
8747 }
8748 else
8749 {
8750 /* Something in the delay slot, but we've got a long branch. */
8751 if (GET_CODE (operands[1]) == CONST_INT)
8752 return "ldi %1,%0\n\tb %2";
8753 else
8754 return "copy %1,%0\n\tb %2";
8755 }
8756 }
8757
8758 if (GET_CODE (operands[1]) == CONST_INT)
8759 output_asm_insn ("ldi %1,%0", operands);
8760 else
8761 output_asm_insn ("copy %1,%0", operands);
8762 return output_lbranch (operands[2], insn, 1);
8763 }
8764
8765 /* Output an unconditional add and branch insn. */
8766
8767 const char *
8768 output_parallel_addb (rtx *operands, rtx insn)
8769 {
8770 int length = get_attr_length (insn);
8771
8772 /* To make life easy we want operand0 to be the shared input/output
8773 operand and operand1 to be the readonly operand. */
8774 if (operands[0] == operands[1])
8775 operands[1] = operands[2];
8776
8777 /* These are the cases in which we win. */
8778 if (length == 4)
8779 return "add%I1b,tr %1,%0,%3";
8780
8781 /* None of the following cases win, but they don't lose either. */
8782 if (length == 8)
8783 {
8784 if (dbr_sequence_length () == 0)
8785 /* Nothing in the delay slot, fake it by putting the combined
8786 insn (the copy or add) in the delay slot of a bl. */
8787 return "b %3\n\tadd%I1 %1,%0,%0";
8788 else
8789 /* Something in the delay slot, but we've got a long branch. */
8790 return "add%I1 %1,%0,%0\n\tb %3";
8791 }
8792
8793 output_asm_insn ("add%I1 %1,%0,%0", operands);
8794 return output_lbranch (operands[3], insn, 1);
8795 }
8796
8797 /* Return nonzero if INSN (a jump insn) immediately follows a call
8798 to a named function. This is used to avoid filling the delay slot
8799 of the jump since it can usually be eliminated by modifying RP in
8800 the delay slot of the call. */
8801
8802 int
8803 following_call (rtx insn)
8804 {
8805 if (! TARGET_JUMP_IN_DELAY)
8806 return 0;
8807
8808 /* Find the previous real insn, skipping NOTEs. */
8809 insn = PREV_INSN (insn);
8810 while (insn && GET_CODE (insn) == NOTE)
8811 insn = PREV_INSN (insn);
8812
8813 /* Check for CALL_INSNs and millicode calls. */
8814 if (insn
8815 && ((GET_CODE (insn) == CALL_INSN
8816 && get_attr_type (insn) != TYPE_DYNCALL)
8817 || (GET_CODE (insn) == INSN
8818 && GET_CODE (PATTERN (insn)) != SEQUENCE
8819 && GET_CODE (PATTERN (insn)) != USE
8820 && GET_CODE (PATTERN (insn)) != CLOBBER
8821 && get_attr_type (insn) == TYPE_MILLI)))
8822 return 1;
8823
8824 return 0;
8825 }
8826
8827 /* We use this hook to perform a PA specific optimization which is difficult
8828 to do in earlier passes.
8829
8830 We want the delay slots of branches within jump tables to be filled.
8831 None of the compiler passes at the moment even has the notion that a
8832 PA jump table doesn't contain addresses, but instead contains actual
8833 instructions!
8834
8835 Because we actually jump into the table, the addresses of each entry
8836 must stay constant in relation to the beginning of the table (which
8837 itself must stay constant relative to the instruction to jump into
8838 it). I don't believe we can guarantee earlier passes of the compiler
8839 will adhere to those rules.
8840
8841 So, late in the compilation process we find all the jump tables, and
8842 expand them into real code -- e.g. each entry in the jump table vector
8843 will get an appropriate label followed by a jump to the final target.
8844
8845 Reorg and the final jump pass can then optimize these branches and
8846 fill their delay slots. We end up with smaller, more efficient code.
8847
8848 The jump instructions within the table are special; we must be able
8849 to identify them during assembly output (if the jumps don't get filled
8850 we need to emit a nop rather than nullifying the delay slot)). We
8851 identify jumps in switch tables by using insns with the attribute
8852 type TYPE_BTABLE_BRANCH.
8853
8854 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8855 insns. This serves two purposes, first it prevents jump.c from
8856 noticing that the last N entries in the table jump to the instruction
8857 immediately after the table and deleting the jumps. Second, those
8858 insns mark where we should emit .begin_brtab and .end_brtab directives
8859 when using GAS (allows for better link time optimizations). */
8860
8861 static void
8862 pa_reorg (void)
8863 {
8864 rtx insn;
8865
8866 remove_useless_addtr_insns (1);
8867
8868 if (pa_cpu < PROCESSOR_8000)
8869 pa_combine_instructions ();
8870
8871
8872 /* This is fairly cheap, so always run it if optimizing. */
8873 if (optimize > 0 && !TARGET_BIG_SWITCH)
8874 {
8875 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8876 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8877 {
8878 rtx pattern, tmp, location, label;
8879 unsigned int length, i;
8880
8881 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8882 if (GET_CODE (insn) != JUMP_INSN
8883 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8884 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8885 continue;
8886
8887 /* Emit marker for the beginning of the branch table. */
8888 emit_insn_before (gen_begin_brtab (), insn);
8889
8890 pattern = PATTERN (insn);
8891 location = PREV_INSN (insn);
8892 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8893
8894 for (i = 0; i < length; i++)
8895 {
8896 /* Emit a label before each jump to keep jump.c from
8897 removing this code. */
8898 tmp = gen_label_rtx ();
8899 LABEL_NUSES (tmp) = 1;
8900 emit_label_after (tmp, location);
8901 location = NEXT_INSN (location);
8902
8903 if (GET_CODE (pattern) == ADDR_VEC)
8904 label = XEXP (XVECEXP (pattern, 0, i), 0);
8905 else
8906 label = XEXP (XVECEXP (pattern, 1, i), 0);
8907
8908 tmp = gen_short_jump (label);
8909
8910 /* Emit the jump itself. */
8911 tmp = emit_jump_insn_after (tmp, location);
8912 JUMP_LABEL (tmp) = label;
8913 LABEL_NUSES (label)++;
8914 location = NEXT_INSN (location);
8915
8916 /* Emit a BARRIER after the jump. */
8917 emit_barrier_after (location);
8918 location = NEXT_INSN (location);
8919 }
8920
8921 /* Emit marker for the end of the branch table. */
8922 emit_insn_before (gen_end_brtab (), location);
8923 location = NEXT_INSN (location);
8924 emit_barrier_after (location);
8925
8926 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8927 delete_insn (insn);
8928 }
8929 }
8930 else
8931 {
8932 /* Still need brtab marker insns. FIXME: the presence of these
8933 markers disables output of the branch table to readonly memory,
8934 and any alignment directives that might be needed. Possibly,
8935 the begin_brtab insn should be output before the label for the
8936 table. This doesn't matter at the moment since the tables are
8937 always output in the text section. */
8938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8939 {
8940 /* Find an ADDR_VEC insn. */
8941 if (GET_CODE (insn) != JUMP_INSN
8942 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8943 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8944 continue;
8945
8946 /* Now generate markers for the beginning and end of the
8947 branch table. */
8948 emit_insn_before (gen_begin_brtab (), insn);
8949 emit_insn_after (gen_end_brtab (), insn);
8950 }
8951 }
8952 }
8953
8954 /* The PA has a number of odd instructions which can perform multiple
8955 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8956 it may be profitable to combine two instructions into one instruction
8957 with two outputs. It's not profitable PA2.0 machines because the
8958 two outputs would take two slots in the reorder buffers.
8959
8960 This routine finds instructions which can be combined and combines
8961 them. We only support some of the potential combinations, and we
8962 only try common ways to find suitable instructions.
8963
8964 * addb can add two registers or a register and a small integer
8965 and jump to a nearby (+-8k) location. Normally the jump to the
8966 nearby location is conditional on the result of the add, but by
8967 using the "true" condition we can make the jump unconditional.
8968 Thus addb can perform two independent operations in one insn.
8969
8970 * movb is similar to addb in that it can perform a reg->reg
8971 or small immediate->reg copy and jump to a nearby (+-8k location).
8972
8973 * fmpyadd and fmpysub can perform a FP multiply and either an
8974 FP add or FP sub if the operands of the multiply and add/sub are
8975 independent (there are other minor restrictions). Note both
8976 the fmpy and fadd/fsub can in theory move to better spots according
8977 to data dependencies, but for now we require the fmpy stay at a
8978 fixed location.
8979
8980 * Many of the memory operations can perform pre & post updates
8981 of index registers. GCC's pre/post increment/decrement addressing
8982 is far too simple to take advantage of all the possibilities. This
8983 pass may not be suitable since those insns may not be independent.
8984
8985 * comclr can compare two ints or an int and a register, nullify
8986 the following instruction and zero some other register. This
8987 is more difficult to use as it's harder to find an insn which
8988 will generate a comclr than finding something like an unconditional
8989 branch. (conditional moves & long branches create comclr insns).
8990
8991 * Most arithmetic operations can conditionally skip the next
8992 instruction. They can be viewed as "perform this operation
8993 and conditionally jump to this nearby location" (where nearby
8994 is an insns away). These are difficult to use due to the
8995 branch length restrictions. */
8996
8997 static void
8998 pa_combine_instructions (void)
8999 {
9000 rtx anchor, new_rtx;
9001
9002 /* This can get expensive since the basic algorithm is on the
9003 order of O(n^2) (or worse). Only do it for -O2 or higher
9004 levels of optimization. */
9005 if (optimize < 2)
9006 return;
9007
9008 /* Walk down the list of insns looking for "anchor" insns which
9009 may be combined with "floating" insns. As the name implies,
9010 "anchor" instructions don't move, while "floating" insns may
9011 move around. */
9012 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9013 new_rtx = make_insn_raw (new_rtx);
9014
9015 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9016 {
9017 enum attr_pa_combine_type anchor_attr;
9018 enum attr_pa_combine_type floater_attr;
9019
9020 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9021 Also ignore any special USE insns. */
9022 if ((GET_CODE (anchor) != INSN
9023 && GET_CODE (anchor) != JUMP_INSN
9024 && GET_CODE (anchor) != CALL_INSN)
9025 || GET_CODE (PATTERN (anchor)) == USE
9026 || GET_CODE (PATTERN (anchor)) == CLOBBER
9027 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9028 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9029 continue;
9030
9031 anchor_attr = get_attr_pa_combine_type (anchor);
9032 /* See if anchor is an insn suitable for combination. */
9033 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9034 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9035 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9036 && ! forward_branch_p (anchor)))
9037 {
9038 rtx floater;
9039
9040 for (floater = PREV_INSN (anchor);
9041 floater;
9042 floater = PREV_INSN (floater))
9043 {
9044 if (GET_CODE (floater) == NOTE
9045 || (GET_CODE (floater) == INSN
9046 && (GET_CODE (PATTERN (floater)) == USE
9047 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9048 continue;
9049
9050 /* Anything except a regular INSN will stop our search. */
9051 if (GET_CODE (floater) != INSN
9052 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9053 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9054 {
9055 floater = NULL_RTX;
9056 break;
9057 }
9058
9059 /* See if FLOATER is suitable for combination with the
9060 anchor. */
9061 floater_attr = get_attr_pa_combine_type (floater);
9062 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9063 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9064 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9065 && floater_attr == PA_COMBINE_TYPE_FMPY))
9066 {
9067 /* If ANCHOR and FLOATER can be combined, then we're
9068 done with this pass. */
9069 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9070 SET_DEST (PATTERN (floater)),
9071 XEXP (SET_SRC (PATTERN (floater)), 0),
9072 XEXP (SET_SRC (PATTERN (floater)), 1)))
9073 break;
9074 }
9075
9076 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9077 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9078 {
9079 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9080 {
9081 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9082 SET_DEST (PATTERN (floater)),
9083 XEXP (SET_SRC (PATTERN (floater)), 0),
9084 XEXP (SET_SRC (PATTERN (floater)), 1)))
9085 break;
9086 }
9087 else
9088 {
9089 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9090 SET_DEST (PATTERN (floater)),
9091 SET_SRC (PATTERN (floater)),
9092 SET_SRC (PATTERN (floater))))
9093 break;
9094 }
9095 }
9096 }
9097
9098 /* If we didn't find anything on the backwards scan try forwards. */
9099 if (!floater
9100 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9101 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9102 {
9103 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9104 {
9105 if (GET_CODE (floater) == NOTE
9106 || (GET_CODE (floater) == INSN
9107 && (GET_CODE (PATTERN (floater)) == USE
9108 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9109
9110 continue;
9111
9112 /* Anything except a regular INSN will stop our search. */
9113 if (GET_CODE (floater) != INSN
9114 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9115 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9116 {
9117 floater = NULL_RTX;
9118 break;
9119 }
9120
9121 /* See if FLOATER is suitable for combination with the
9122 anchor. */
9123 floater_attr = get_attr_pa_combine_type (floater);
9124 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9125 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9126 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9127 && floater_attr == PA_COMBINE_TYPE_FMPY))
9128 {
9129 /* If ANCHOR and FLOATER can be combined, then we're
9130 done with this pass. */
9131 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9132 SET_DEST (PATTERN (floater)),
9133 XEXP (SET_SRC (PATTERN (floater)),
9134 0),
9135 XEXP (SET_SRC (PATTERN (floater)),
9136 1)))
9137 break;
9138 }
9139 }
9140 }
9141
9142 /* FLOATER will be nonzero if we found a suitable floating
9143 insn for combination with ANCHOR. */
9144 if (floater
9145 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9146 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9147 {
9148 /* Emit the new instruction and delete the old anchor. */
9149 emit_insn_before (gen_rtx_PARALLEL
9150 (VOIDmode,
9151 gen_rtvec (2, PATTERN (anchor),
9152 PATTERN (floater))),
9153 anchor);
9154
9155 SET_INSN_DELETED (anchor);
9156
9157 /* Emit a special USE insn for FLOATER, then delete
9158 the floating insn. */
9159 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9160 delete_insn (floater);
9161
9162 continue;
9163 }
9164 else if (floater
9165 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9166 {
9167 rtx temp;
9168 /* Emit the new_jump instruction and delete the old anchor. */
9169 temp
9170 = emit_jump_insn_before (gen_rtx_PARALLEL
9171 (VOIDmode,
9172 gen_rtvec (2, PATTERN (anchor),
9173 PATTERN (floater))),
9174 anchor);
9175
9176 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9177 SET_INSN_DELETED (anchor);
9178
9179 /* Emit a special USE insn for FLOATER, then delete
9180 the floating insn. */
9181 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9182 delete_insn (floater);
9183 continue;
9184 }
9185 }
9186 }
9187 }
9188
9189 static int
9190 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9191 rtx src1, rtx src2)
9192 {
9193 int insn_code_number;
9194 rtx start, end;
9195
9196 /* Create a PARALLEL with the patterns of ANCHOR and
9197 FLOATER, try to recognize it, then test constraints
9198 for the resulting pattern.
9199
9200 If the pattern doesn't match or the constraints
9201 aren't met keep searching for a suitable floater
9202 insn. */
9203 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9204 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9205 INSN_CODE (new_rtx) = -1;
9206 insn_code_number = recog_memoized (new_rtx);
9207 if (insn_code_number < 0
9208 || (extract_insn (new_rtx), ! constrain_operands (1)))
9209 return 0;
9210
9211 if (reversed)
9212 {
9213 start = anchor;
9214 end = floater;
9215 }
9216 else
9217 {
9218 start = floater;
9219 end = anchor;
9220 }
9221
9222 /* There's up to three operands to consider. One
9223 output and two inputs.
9224
9225 The output must not be used between FLOATER & ANCHOR
9226 exclusive. The inputs must not be set between
9227 FLOATER and ANCHOR exclusive. */
9228
9229 if (reg_used_between_p (dest, start, end))
9230 return 0;
9231
9232 if (reg_set_between_p (src1, start, end))
9233 return 0;
9234
9235 if (reg_set_between_p (src2, start, end))
9236 return 0;
9237
9238 /* If we get here, then everything is good. */
9239 return 1;
9240 }
9241
9242 /* Return nonzero if references for INSN are delayed.
9243
9244 Millicode insns are actually function calls with some special
9245 constraints on arguments and register usage.
9246
9247 Millicode calls always expect their arguments in the integer argument
9248 registers, and always return their result in %r29 (ret1). They
9249 are expected to clobber their arguments, %r1, %r29, and the return
9250 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9251
9252 This function tells reorg that the references to arguments and
9253 millicode calls do not appear to happen until after the millicode call.
9254 This allows reorg to put insns which set the argument registers into the
9255 delay slot of the millicode call -- thus they act more like traditional
9256 CALL_INSNs.
9257
9258 Note we cannot consider side effects of the insn to be delayed because
9259 the branch and link insn will clobber the return pointer. If we happened
9260 to use the return pointer in the delay slot of the call, then we lose.
9261
9262 get_attr_type will try to recognize the given insn, so make sure to
9263 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9264 in particular. */
9265 int
9266 insn_refs_are_delayed (rtx insn)
9267 {
9268 return ((GET_CODE (insn) == INSN
9269 && GET_CODE (PATTERN (insn)) != SEQUENCE
9270 && GET_CODE (PATTERN (insn)) != USE
9271 && GET_CODE (PATTERN (insn)) != CLOBBER
9272 && get_attr_type (insn) == TYPE_MILLI));
9273 }
9274
9275 /* Promote the return value, but not the arguments. */
9276
9277 static enum machine_mode
9278 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9279 enum machine_mode mode,
9280 int *punsignedp ATTRIBUTE_UNUSED,
9281 const_tree fntype ATTRIBUTE_UNUSED,
9282 int for_return)
9283 {
9284 if (for_return == 0)
9285 return mode;
9286 return promote_mode (type, mode, punsignedp);
9287 }
9288
9289 /* On the HP-PA the value is found in register(s) 28(-29), unless
9290 the mode is SF or DF. Then the value is returned in fr4 (32).
9291
9292 This must perform the same promotions as PROMOTE_MODE, else promoting
9293 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9294
9295 Small structures must be returned in a PARALLEL on PA64 in order
9296 to match the HP Compiler ABI. */
9297
9298 static rtx
9299 pa_function_value (const_tree valtype,
9300 const_tree func ATTRIBUTE_UNUSED,
9301 bool outgoing ATTRIBUTE_UNUSED)
9302 {
9303 enum machine_mode valmode;
9304
9305 if (AGGREGATE_TYPE_P (valtype)
9306 || TREE_CODE (valtype) == COMPLEX_TYPE
9307 || TREE_CODE (valtype) == VECTOR_TYPE)
9308 {
9309 if (TARGET_64BIT)
9310 {
9311 /* Aggregates with a size less than or equal to 128 bits are
9312 returned in GR 28(-29). They are left justified. The pad
9313 bits are undefined. Larger aggregates are returned in
9314 memory. */
9315 rtx loc[2];
9316 int i, offset = 0;
9317 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9318
9319 for (i = 0; i < ub; i++)
9320 {
9321 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9322 gen_rtx_REG (DImode, 28 + i),
9323 GEN_INT (offset));
9324 offset += 8;
9325 }
9326
9327 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9328 }
9329 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9330 {
9331 /* Aggregates 5 to 8 bytes in size are returned in general
9332 registers r28-r29 in the same manner as other non
9333 floating-point objects. The data is right-justified and
9334 zero-extended to 64 bits. This is opposite to the normal
9335 justification used on big endian targets and requires
9336 special treatment. */
9337 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9338 gen_rtx_REG (DImode, 28), const0_rtx);
9339 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9340 }
9341 }
9342
9343 if ((INTEGRAL_TYPE_P (valtype)
9344 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9345 || POINTER_TYPE_P (valtype))
9346 valmode = word_mode;
9347 else
9348 valmode = TYPE_MODE (valtype);
9349
9350 if (TREE_CODE (valtype) == REAL_TYPE
9351 && !AGGREGATE_TYPE_P (valtype)
9352 && TYPE_MODE (valtype) != TFmode
9353 && !TARGET_SOFT_FLOAT)
9354 return gen_rtx_REG (valmode, 32);
9355
9356 return gen_rtx_REG (valmode, 28);
9357 }
9358
9359 /* Implement the TARGET_LIBCALL_VALUE hook. */
9360
9361 static rtx
9362 pa_libcall_value (enum machine_mode mode,
9363 const_rtx fun ATTRIBUTE_UNUSED)
9364 {
9365 if (! TARGET_SOFT_FLOAT
9366 && (mode == SFmode || mode == DFmode))
9367 return gen_rtx_REG (mode, 32);
9368 else
9369 return gen_rtx_REG (mode, 28);
9370 }
9371
9372 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9373
9374 static bool
9375 pa_function_value_regno_p (const unsigned int regno)
9376 {
9377 if (regno == 28
9378 || (! TARGET_SOFT_FLOAT && regno == 32))
9379 return true;
9380
9381 return false;
9382 }
9383
9384 /* Return the location of a parameter that is passed in a register or NULL
9385 if the parameter has any component that is passed in memory.
9386
9387 This is new code and will be pushed to into the net sources after
9388 further testing.
9389
9390 ??? We might want to restructure this so that it looks more like other
9391 ports. */
9392 rtx
9393 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9394 int named ATTRIBUTE_UNUSED)
9395 {
9396 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9397 int alignment = 0;
9398 int arg_size;
9399 int fpr_reg_base;
9400 int gpr_reg_base;
9401 rtx retval;
9402
9403 if (mode == VOIDmode)
9404 return NULL_RTX;
9405
9406 arg_size = FUNCTION_ARG_SIZE (mode, type);
9407
9408 /* If this arg would be passed partially or totally on the stack, then
9409 this routine should return zero. pa_arg_partial_bytes will
9410 handle arguments which are split between regs and stack slots if
9411 the ABI mandates split arguments. */
9412 if (!TARGET_64BIT)
9413 {
9414 /* The 32-bit ABI does not split arguments. */
9415 if (cum->words + arg_size > max_arg_words)
9416 return NULL_RTX;
9417 }
9418 else
9419 {
9420 if (arg_size > 1)
9421 alignment = cum->words & 1;
9422 if (cum->words + alignment >= max_arg_words)
9423 return NULL_RTX;
9424 }
9425
9426 /* The 32bit ABIs and the 64bit ABIs are rather different,
9427 particularly in their handling of FP registers. We might
9428 be able to cleverly share code between them, but I'm not
9429 going to bother in the hope that splitting them up results
9430 in code that is more easily understood. */
9431
9432 if (TARGET_64BIT)
9433 {
9434 /* Advance the base registers to their current locations.
9435
9436 Remember, gprs grow towards smaller register numbers while
9437 fprs grow to higher register numbers. Also remember that
9438 although FP regs are 32-bit addressable, we pretend that
9439 the registers are 64-bits wide. */
9440 gpr_reg_base = 26 - cum->words;
9441 fpr_reg_base = 32 + cum->words;
9442
9443 /* Arguments wider than one word and small aggregates need special
9444 treatment. */
9445 if (arg_size > 1
9446 || mode == BLKmode
9447 || (type && (AGGREGATE_TYPE_P (type)
9448 || TREE_CODE (type) == COMPLEX_TYPE
9449 || TREE_CODE (type) == VECTOR_TYPE)))
9450 {
9451 /* Double-extended precision (80-bit), quad-precision (128-bit)
9452 and aggregates including complex numbers are aligned on
9453 128-bit boundaries. The first eight 64-bit argument slots
9454 are associated one-to-one, with general registers r26
9455 through r19, and also with floating-point registers fr4
9456 through fr11. Arguments larger than one word are always
9457 passed in general registers.
9458
9459 Using a PARALLEL with a word mode register results in left
9460 justified data on a big-endian target. */
9461
9462 rtx loc[8];
9463 int i, offset = 0, ub = arg_size;
9464
9465 /* Align the base register. */
9466 gpr_reg_base -= alignment;
9467
9468 ub = MIN (ub, max_arg_words - cum->words - alignment);
9469 for (i = 0; i < ub; i++)
9470 {
9471 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9472 gen_rtx_REG (DImode, gpr_reg_base),
9473 GEN_INT (offset));
9474 gpr_reg_base -= 1;
9475 offset += 8;
9476 }
9477
9478 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9479 }
9480 }
9481 else
9482 {
9483 /* If the argument is larger than a word, then we know precisely
9484 which registers we must use. */
9485 if (arg_size > 1)
9486 {
9487 if (cum->words)
9488 {
9489 gpr_reg_base = 23;
9490 fpr_reg_base = 38;
9491 }
9492 else
9493 {
9494 gpr_reg_base = 25;
9495 fpr_reg_base = 34;
9496 }
9497
9498 /* Structures 5 to 8 bytes in size are passed in the general
9499 registers in the same manner as other non floating-point
9500 objects. The data is right-justified and zero-extended
9501 to 64 bits. This is opposite to the normal justification
9502 used on big endian targets and requires special treatment.
9503 We now define BLOCK_REG_PADDING to pad these objects.
9504 Aggregates, complex and vector types are passed in the same
9505 manner as structures. */
9506 if (mode == BLKmode
9507 || (type && (AGGREGATE_TYPE_P (type)
9508 || TREE_CODE (type) == COMPLEX_TYPE
9509 || TREE_CODE (type) == VECTOR_TYPE)))
9510 {
9511 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9512 gen_rtx_REG (DImode, gpr_reg_base),
9513 const0_rtx);
9514 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9515 }
9516 }
9517 else
9518 {
9519 /* We have a single word (32 bits). A simple computation
9520 will get us the register #s we need. */
9521 gpr_reg_base = 26 - cum->words;
9522 fpr_reg_base = 32 + 2 * cum->words;
9523 }
9524 }
9525
9526 /* Determine if the argument needs to be passed in both general and
9527 floating point registers. */
9528 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9529 /* If we are doing soft-float with portable runtime, then there
9530 is no need to worry about FP regs. */
9531 && !TARGET_SOFT_FLOAT
9532 /* The parameter must be some kind of scalar float, else we just
9533 pass it in integer registers. */
9534 && GET_MODE_CLASS (mode) == MODE_FLOAT
9535 /* The target function must not have a prototype. */
9536 && cum->nargs_prototype <= 0
9537 /* libcalls do not need to pass items in both FP and general
9538 registers. */
9539 && type != NULL_TREE
9540 /* All this hair applies to "outgoing" args only. This includes
9541 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9542 && !cum->incoming)
9543 /* Also pass outgoing floating arguments in both registers in indirect
9544 calls with the 32 bit ABI and the HP assembler since there is no
9545 way to the specify argument locations in static functions. */
9546 || (!TARGET_64BIT
9547 && !TARGET_GAS
9548 && !cum->incoming
9549 && cum->indirect
9550 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9551 {
9552 retval
9553 = gen_rtx_PARALLEL
9554 (mode,
9555 gen_rtvec (2,
9556 gen_rtx_EXPR_LIST (VOIDmode,
9557 gen_rtx_REG (mode, fpr_reg_base),
9558 const0_rtx),
9559 gen_rtx_EXPR_LIST (VOIDmode,
9560 gen_rtx_REG (mode, gpr_reg_base),
9561 const0_rtx)));
9562 }
9563 else
9564 {
9565 /* See if we should pass this parameter in a general register. */
9566 if (TARGET_SOFT_FLOAT
9567 /* Indirect calls in the normal 32bit ABI require all arguments
9568 to be passed in general registers. */
9569 || (!TARGET_PORTABLE_RUNTIME
9570 && !TARGET_64BIT
9571 && !TARGET_ELF32
9572 && cum->indirect)
9573 /* If the parameter is not a scalar floating-point parameter,
9574 then it belongs in GPRs. */
9575 || GET_MODE_CLASS (mode) != MODE_FLOAT
9576 /* Structure with single SFmode field belongs in GPR. */
9577 || (type && AGGREGATE_TYPE_P (type)))
9578 retval = gen_rtx_REG (mode, gpr_reg_base);
9579 else
9580 retval = gen_rtx_REG (mode, fpr_reg_base);
9581 }
9582 return retval;
9583 }
9584
9585
9586 /* If this arg would be passed totally in registers or totally on the stack,
9587 then this routine should return zero. */
9588
9589 static int
9590 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9591 tree type, bool named ATTRIBUTE_UNUSED)
9592 {
9593 unsigned int max_arg_words = 8;
9594 unsigned int offset = 0;
9595
9596 if (!TARGET_64BIT)
9597 return 0;
9598
9599 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9600 offset = 1;
9601
9602 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9603 /* Arg fits fully into registers. */
9604 return 0;
9605 else if (cum->words + offset >= max_arg_words)
9606 /* Arg fully on the stack. */
9607 return 0;
9608 else
9609 /* Arg is split. */
9610 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9611 }
9612
9613
9614 /* A get_unnamed_section callback for switching to the text section.
9615
9616 This function is only used with SOM. Because we don't support
9617 named subspaces, we can only create a new subspace or switch back
9618 to the default text subspace. */
9619
9620 static void
9621 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9622 {
9623 gcc_assert (TARGET_SOM);
9624 if (TARGET_GAS)
9625 {
9626 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9627 {
9628 /* We only want to emit a .nsubspa directive once at the
9629 start of the function. */
9630 cfun->machine->in_nsubspa = 1;
9631
9632 /* Create a new subspace for the text. This provides
9633 better stub placement and one-only functions. */
9634 if (cfun->decl
9635 && DECL_ONE_ONLY (cfun->decl)
9636 && !DECL_WEAK (cfun->decl))
9637 {
9638 output_section_asm_op ("\t.SPACE $TEXT$\n"
9639 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9640 "ACCESS=44,SORT=24,COMDAT");
9641 return;
9642 }
9643 }
9644 else
9645 {
9646 /* There isn't a current function or the body of the current
9647 function has been completed. So, we are changing to the
9648 text section to output debugging information. Thus, we
9649 need to forget that we are in the text section so that
9650 varasm.c will call us when text_section is selected again. */
9651 gcc_assert (!cfun || !cfun->machine
9652 || cfun->machine->in_nsubspa == 2);
9653 in_section = NULL;
9654 }
9655 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9656 return;
9657 }
9658 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9659 }
9660
9661 /* A get_unnamed_section callback for switching to comdat data
9662 sections. This function is only used with SOM. */
9663
9664 static void
9665 som_output_comdat_data_section_asm_op (const void *data)
9666 {
9667 in_section = NULL;
9668 output_section_asm_op (data);
9669 }
9670
9671 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9672
9673 static void
9674 pa_som_asm_init_sections (void)
9675 {
9676 text_section
9677 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9678
9679 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9680 is not being generated. */
9681 som_readonly_data_section
9682 = get_unnamed_section (0, output_section_asm_op,
9683 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9684
9685 /* When secondary definitions are not supported, SOM makes readonly
9686 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9687 the comdat flag. */
9688 som_one_only_readonly_data_section
9689 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9690 "\t.SPACE $TEXT$\n"
9691 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9692 "ACCESS=0x2c,SORT=16,COMDAT");
9693
9694
9695 /* When secondary definitions are not supported, SOM makes data one-only
9696 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9697 som_one_only_data_section
9698 = get_unnamed_section (SECTION_WRITE,
9699 som_output_comdat_data_section_asm_op,
9700 "\t.SPACE $PRIVATE$\n"
9701 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9702 "ACCESS=31,SORT=24,COMDAT");
9703
9704 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9705 which reference data within the $TEXT$ space (for example constant
9706 strings in the $LIT$ subspace).
9707
9708 The assemblers (GAS and HP as) both have problems with handling
9709 the difference of two symbols which is the other correct way to
9710 reference constant data during PIC code generation.
9711
9712 So, there's no way to reference constant data which is in the
9713 $TEXT$ space during PIC generation. Instead place all constant
9714 data into the $PRIVATE$ subspace (this reduces sharing, but it
9715 works correctly). */
9716 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9717
9718 /* We must not have a reference to an external symbol defined in a
9719 shared library in a readonly section, else the SOM linker will
9720 complain.
9721
9722 So, we force exception information into the data section. */
9723 exception_section = data_section;
9724 }
9725
9726 /* On hpux10, the linker will give an error if we have a reference
9727 in the read-only data section to a symbol defined in a shared
9728 library. Therefore, expressions that might require a reloc can
9729 not be placed in the read-only data section. */
9730
9731 static section *
9732 pa_select_section (tree exp, int reloc,
9733 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9734 {
9735 if (TREE_CODE (exp) == VAR_DECL
9736 && TREE_READONLY (exp)
9737 && !TREE_THIS_VOLATILE (exp)
9738 && DECL_INITIAL (exp)
9739 && (DECL_INITIAL (exp) == error_mark_node
9740 || TREE_CONSTANT (DECL_INITIAL (exp)))
9741 && !reloc)
9742 {
9743 if (TARGET_SOM
9744 && DECL_ONE_ONLY (exp)
9745 && !DECL_WEAK (exp))
9746 return som_one_only_readonly_data_section;
9747 else
9748 return readonly_data_section;
9749 }
9750 else if (CONSTANT_CLASS_P (exp) && !reloc)
9751 return readonly_data_section;
9752 else if (TARGET_SOM
9753 && TREE_CODE (exp) == VAR_DECL
9754 && DECL_ONE_ONLY (exp)
9755 && !DECL_WEAK (exp))
9756 return som_one_only_data_section;
9757 else
9758 return data_section;
9759 }
9760
9761 static void
9762 pa_globalize_label (FILE *stream, const char *name)
9763 {
9764 /* We only handle DATA objects here, functions are globalized in
9765 ASM_DECLARE_FUNCTION_NAME. */
9766 if (! FUNCTION_NAME_P (name))
9767 {
9768 fputs ("\t.EXPORT ", stream);
9769 assemble_name (stream, name);
9770 fputs (",DATA\n", stream);
9771 }
9772 }
9773
9774 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9775
9776 static rtx
9777 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9778 int incoming ATTRIBUTE_UNUSED)
9779 {
9780 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9781 }
9782
9783 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9784
9785 bool
9786 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9787 {
9788 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9789 PA64 ABI says that objects larger than 128 bits are returned in memory.
9790 Note, int_size_in_bytes can return -1 if the size of the object is
9791 variable or larger than the maximum value that can be expressed as
9792 a HOST_WIDE_INT. It can also return zero for an empty type. The
9793 simplest way to handle variable and empty types is to pass them in
9794 memory. This avoids problems in defining the boundaries of argument
9795 slots, allocating registers, etc. */
9796 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9797 || int_size_in_bytes (type) <= 0);
9798 }
9799
9800 /* Structure to hold declaration and name of external symbols that are
9801 emitted by GCC. We generate a vector of these symbols and output them
9802 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9803 This avoids putting out names that are never really used. */
9804
9805 typedef struct GTY(()) extern_symbol
9806 {
9807 tree decl;
9808 const char *name;
9809 } extern_symbol;
9810
9811 /* Define gc'd vector type for extern_symbol. */
9812 DEF_VEC_O(extern_symbol);
9813 DEF_VEC_ALLOC_O(extern_symbol,gc);
9814
9815 /* Vector of extern_symbol pointers. */
9816 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9817
9818 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9819 /* Mark DECL (name NAME) as an external reference (assembler output
9820 file FILE). This saves the names to output at the end of the file
9821 if actually referenced. */
9822
9823 void
9824 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9825 {
9826 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9827
9828 gcc_assert (file == asm_out_file);
9829 p->decl = decl;
9830 p->name = name;
9831 }
9832
9833 /* Output text required at the end of an assembler file.
9834 This includes deferred plabels and .import directives for
9835 all external symbols that were actually referenced. */
9836
9837 static void
9838 pa_hpux_file_end (void)
9839 {
9840 unsigned int i;
9841 extern_symbol *p;
9842
9843 if (!NO_DEFERRED_PROFILE_COUNTERS)
9844 output_deferred_profile_counters ();
9845
9846 output_deferred_plabels ();
9847
9848 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9849 {
9850 tree decl = p->decl;
9851
9852 if (!TREE_ASM_WRITTEN (decl)
9853 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9854 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9855 }
9856
9857 VEC_free (extern_symbol, gc, extern_symbols);
9858 }
9859 #endif
9860
9861 /* Return true if a change from mode FROM to mode TO for a register
9862 in register class RCLASS is invalid. */
9863
9864 bool
9865 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9866 enum reg_class rclass)
9867 {
9868 if (from == to)
9869 return false;
9870
9871 /* Reject changes to/from complex and vector modes. */
9872 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9873 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9874 return true;
9875
9876 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9877 return false;
9878
9879 /* There is no way to load QImode or HImode values directly from
9880 memory. SImode loads to the FP registers are not zero extended.
9881 On the 64-bit target, this conflicts with the definition of
9882 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9883 with different sizes in the floating-point registers. */
9884 if (MAYBE_FP_REG_CLASS_P (rclass))
9885 return true;
9886
9887 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9888 in specific sets of registers. Thus, we cannot allow changing
9889 to a larger mode when it's larger than a word. */
9890 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9891 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9892 return true;
9893
9894 return false;
9895 }
9896
9897 /* Returns TRUE if it is a good idea to tie two pseudo registers
9898 when one has mode MODE1 and one has mode MODE2.
9899 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9900 for any hard reg, then this must be FALSE for correct output.
9901
9902 We should return FALSE for QImode and HImode because these modes
9903 are not ok in the floating-point registers. However, this prevents
9904 tieing these modes to SImode and DImode in the general registers.
9905 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9906 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9907 in the floating-point registers. */
9908
9909 bool
9910 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9911 {
9912 /* Don't tie modes in different classes. */
9913 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9914 return false;
9915
9916 return true;
9917 }
9918
9919 \f
9920 /* Length in units of the trampoline instruction code. */
9921
9922 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9923
9924
9925 /* Output assembler code for a block containing the constant parts
9926 of a trampoline, leaving space for the variable parts.\
9927
9928 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9929 and then branches to the specified routine.
9930
9931 This code template is copied from text segment to stack location
9932 and then patched with pa_trampoline_init to contain valid values,
9933 and then entered as a subroutine.
9934
9935 It is best to keep this as small as possible to avoid having to
9936 flush multiple lines in the cache. */
9937
9938 static void
9939 pa_asm_trampoline_template (FILE *f)
9940 {
9941 if (!TARGET_64BIT)
9942 {
9943 fputs ("\tldw 36(%r22),%r21\n", f);
9944 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9945 if (ASSEMBLER_DIALECT == 0)
9946 fputs ("\tdepi 0,31,2,%r21\n", f);
9947 else
9948 fputs ("\tdepwi 0,31,2,%r21\n", f);
9949 fputs ("\tldw 4(%r21),%r19\n", f);
9950 fputs ("\tldw 0(%r21),%r21\n", f);
9951 if (TARGET_PA_20)
9952 {
9953 fputs ("\tbve (%r21)\n", f);
9954 fputs ("\tldw 40(%r22),%r29\n", f);
9955 fputs ("\t.word 0\n", f);
9956 fputs ("\t.word 0\n", f);
9957 }
9958 else
9959 {
9960 fputs ("\tldsid (%r21),%r1\n", f);
9961 fputs ("\tmtsp %r1,%sr0\n", f);
9962 fputs ("\tbe 0(%sr0,%r21)\n", f);
9963 fputs ("\tldw 40(%r22),%r29\n", f);
9964 }
9965 fputs ("\t.word 0\n", f);
9966 fputs ("\t.word 0\n", f);
9967 fputs ("\t.word 0\n", f);
9968 fputs ("\t.word 0\n", f);
9969 }
9970 else
9971 {
9972 fputs ("\t.dword 0\n", f);
9973 fputs ("\t.dword 0\n", f);
9974 fputs ("\t.dword 0\n", f);
9975 fputs ("\t.dword 0\n", f);
9976 fputs ("\tmfia %r31\n", f);
9977 fputs ("\tldd 24(%r31),%r1\n", f);
9978 fputs ("\tldd 24(%r1),%r27\n", f);
9979 fputs ("\tldd 16(%r1),%r1\n", f);
9980 fputs ("\tbve (%r1)\n", f);
9981 fputs ("\tldd 32(%r31),%r31\n", f);
9982 fputs ("\t.dword 0 ; fptr\n", f);
9983 fputs ("\t.dword 0 ; static link\n", f);
9984 }
9985 }
9986
9987 /* Emit RTL insns to initialize the variable parts of a trampoline.
9988 FNADDR is an RTX for the address of the function's pure code.
9989 CXT is an RTX for the static chain value for the function.
9990
9991 Move the function address to the trampoline template at offset 36.
9992 Move the static chain value to trampoline template at offset 40.
9993 Move the trampoline address to trampoline template at offset 44.
9994 Move r19 to trampoline template at offset 48. The latter two
9995 words create a plabel for the indirect call to the trampoline.
9996
9997 A similar sequence is used for the 64-bit port but the plabel is
9998 at the beginning of the trampoline.
9999
10000 Finally, the cache entries for the trampoline code are flushed.
10001 This is necessary to ensure that the trampoline instruction sequence
10002 is written to memory prior to any attempts at prefetching the code
10003 sequence. */
10004
10005 static void
10006 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10007 {
10008 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10009 rtx start_addr = gen_reg_rtx (Pmode);
10010 rtx end_addr = gen_reg_rtx (Pmode);
10011 rtx line_length = gen_reg_rtx (Pmode);
10012 rtx r_tramp, tmp;
10013
10014 emit_block_move (m_tramp, assemble_trampoline_template (),
10015 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10016 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10017
10018 if (!TARGET_64BIT)
10019 {
10020 tmp = adjust_address (m_tramp, Pmode, 36);
10021 emit_move_insn (tmp, fnaddr);
10022 tmp = adjust_address (m_tramp, Pmode, 40);
10023 emit_move_insn (tmp, chain_value);
10024
10025 /* Create a fat pointer for the trampoline. */
10026 tmp = adjust_address (m_tramp, Pmode, 44);
10027 emit_move_insn (tmp, r_tramp);
10028 tmp = adjust_address (m_tramp, Pmode, 48);
10029 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10030
10031 /* fdc and fic only use registers for the address to flush,
10032 they do not accept integer displacements. We align the
10033 start and end addresses to the beginning of their respective
10034 cache lines to minimize the number of lines flushed. */
10035 emit_insn (gen_andsi3 (start_addr, r_tramp,
10036 GEN_INT (-MIN_CACHELINE_SIZE)));
10037 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10038 emit_insn (gen_andsi3 (end_addr, tmp,
10039 GEN_INT (-MIN_CACHELINE_SIZE)));
10040 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10041 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10042 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10043 gen_reg_rtx (Pmode),
10044 gen_reg_rtx (Pmode)));
10045 }
10046 else
10047 {
10048 tmp = adjust_address (m_tramp, Pmode, 56);
10049 emit_move_insn (tmp, fnaddr);
10050 tmp = adjust_address (m_tramp, Pmode, 64);
10051 emit_move_insn (tmp, chain_value);
10052
10053 /* Create a fat pointer for the trampoline. */
10054 tmp = adjust_address (m_tramp, Pmode, 16);
10055 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10056 tmp = adjust_address (m_tramp, Pmode, 24);
10057 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10058
10059 /* fdc and fic only use registers for the address to flush,
10060 they do not accept integer displacements. We align the
10061 start and end addresses to the beginning of their respective
10062 cache lines to minimize the number of lines flushed. */
10063 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10064 emit_insn (gen_anddi3 (start_addr, tmp,
10065 GEN_INT (-MIN_CACHELINE_SIZE)));
10066 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10067 emit_insn (gen_anddi3 (end_addr, tmp,
10068 GEN_INT (-MIN_CACHELINE_SIZE)));
10069 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10070 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10071 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10072 gen_reg_rtx (Pmode),
10073 gen_reg_rtx (Pmode)));
10074 }
10075 }
10076
10077 /* Perform any machine-specific adjustment in the address of the trampoline.
10078 ADDR contains the address that was passed to pa_trampoline_init.
10079 Adjust the trampoline address to point to the plabel at offset 44. */
10080
10081 static rtx
10082 pa_trampoline_adjust_address (rtx addr)
10083 {
10084 if (!TARGET_64BIT)
10085 addr = memory_address (Pmode, plus_constant (addr, 46));
10086 return addr;
10087 }
10088
10089 static rtx
10090 pa_delegitimize_address (rtx orig_x)
10091 {
10092 rtx x = delegitimize_mem_from_attrs (orig_x);
10093
10094 if (GET_CODE (x) == LO_SUM
10095 && GET_CODE (XEXP (x, 1)) == UNSPEC
10096 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10097 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10098 return x;
10099 }
10100 \f
10101 #include "gt-pa.h"