builtins.c: Rename movstr*, except for movstrict*, to movmem* and clrstr* to clrmem*.
[gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50
51 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
52 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
53
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
56 int
57 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
58 {
59 enum machine_mode store_mode;
60 enum machine_mode other_mode;
61 rtx set;
62
63 if (recog_memoized (in_insn) < 0
64 || get_attr_type (in_insn) != TYPE_FPSTORE
65 || recog_memoized (out_insn) < 0)
66 return 0;
67
68 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
69
70 set = single_set (out_insn);
71 if (!set)
72 return 0;
73
74 other_mode = GET_MODE (SET_SRC (set));
75
76 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
77 }
78
79
80 #ifndef DO_FRAME_NOTES
81 #ifdef INCOMING_RETURN_ADDR_RTX
82 #define DO_FRAME_NOTES 1
83 #else
84 #define DO_FRAME_NOTES 0
85 #endif
86 #endif
87
88 static void copy_reg_pointer (rtx, rtx);
89 static int hppa_address_cost (rtx);
90 static bool hppa_rtx_costs (rtx, int, int, int *);
91 static inline rtx force_mode (enum machine_mode, rtx);
92 static void pa_reorg (void);
93 static void pa_combine_instructions (void);
94 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
95 static int forward_branch_p (rtx);
96 static int shadd_constant_p (int);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
107 static void update_total_code_bytes (int);
108 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
109 static int pa_adjust_cost (rtx, rtx, rtx, int);
110 static int pa_adjust_priority (rtx, int);
111 static int pa_issue_rate (void);
112 static void pa_select_section (tree, int, unsigned HOST_WIDE_INT)
113 ATTRIBUTE_UNUSED;
114 static void pa_encode_section_info (tree, rtx, int);
115 static const char *pa_strip_name_encoding (const char *);
116 static bool pa_function_ok_for_sibcall (tree, tree);
117 static void pa_globalize_label (FILE *, const char *)
118 ATTRIBUTE_UNUSED;
119 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
120 HOST_WIDE_INT, tree);
121 #if !defined(USE_COLLECT2)
122 static void pa_asm_out_constructor (rtx, int);
123 static void pa_asm_out_destructor (rtx, int);
124 #endif
125 static void pa_init_builtins (void);
126 static rtx hppa_builtin_saveregs (void);
127 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
128 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
129 static struct deferred_plabel *get_plabel (const char *)
130 ATTRIBUTE_UNUSED;
131 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
132 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
135 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
136 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
137 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
140 static void output_deferred_plabels (void);
141 #ifdef HPUX_LONG_DOUBLE_LIBRARY
142 static void pa_hpux_init_libfuncs (void);
143 #endif
144 static rtx pa_struct_value_rtx (tree, int);
145
146 /* Save the operands last given to a compare for use when we
147 generate a scc or bcc insn. */
148 rtx hppa_compare_op0, hppa_compare_op1;
149 enum cmp_type hppa_branch_type;
150
151 /* Which cpu we are scheduling for. */
152 enum processor_type pa_cpu;
153
154 /* String to hold which cpu we are scheduling for. */
155 const char *pa_cpu_string;
156
157 /* Which architecture we are generating code for. */
158 enum architecture_type pa_arch;
159
160 /* String to hold which architecture we are generating code for. */
161 const char *pa_arch_string;
162
163 /* Counts for the number of callee-saved general and floating point
164 registers which were saved by the current function's prologue. */
165 static int gr_saved, fr_saved;
166
167 static rtx find_addr_reg (rtx);
168
169 /* Keep track of the number of bytes we have output in the CODE subspace
170 during this compilation so we'll know when to emit inline long-calls. */
171 unsigned long total_code_bytes;
172
173 /* The last address of the previous function plus the number of bytes in
174 associated thunks that have been output. This is used to determine if
175 a thunk can use an IA-relative branch to reach its target function. */
176 static int last_address;
177
178 /* Variables to handle plabels that we discover are necessary at assembly
179 output time. They are output after the current function. */
180 struct deferred_plabel GTY(())
181 {
182 rtx internal_label;
183 const char *name;
184 };
185 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
186 deferred_plabels;
187 static size_t n_deferred_plabels = 0;
188
189 \f
190 /* Initialize the GCC target structure. */
191
192 #undef TARGET_ASM_ALIGNED_HI_OP
193 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
194 #undef TARGET_ASM_ALIGNED_SI_OP
195 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
196 #undef TARGET_ASM_ALIGNED_DI_OP
197 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
198 #undef TARGET_ASM_UNALIGNED_HI_OP
199 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
200 #undef TARGET_ASM_UNALIGNED_SI_OP
201 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
202 #undef TARGET_ASM_UNALIGNED_DI_OP
203 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
204 #undef TARGET_ASM_INTEGER
205 #define TARGET_ASM_INTEGER pa_assemble_integer
206
207 #undef TARGET_ASM_FUNCTION_PROLOGUE
208 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
209 #undef TARGET_ASM_FUNCTION_EPILOGUE
210 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
211
212 #undef TARGET_SCHED_ADJUST_COST
213 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
214 #undef TARGET_SCHED_ADJUST_PRIORITY
215 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
216 #undef TARGET_SCHED_ISSUE_RATE
217 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
218
219 #undef TARGET_ENCODE_SECTION_INFO
220 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
221 #undef TARGET_STRIP_NAME_ENCODING
222 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
223
224 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
225 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
226
227 #undef TARGET_ASM_OUTPUT_MI_THUNK
228 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
229 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
230 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
231
232 #undef TARGET_ASM_FILE_END
233 #define TARGET_ASM_FILE_END output_deferred_plabels
234
235 #if !defined(USE_COLLECT2)
236 #undef TARGET_ASM_CONSTRUCTOR
237 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
238 #undef TARGET_ASM_DESTRUCTOR
239 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
240 #endif
241
242 #undef TARGET_INIT_BUILTINS
243 #define TARGET_INIT_BUILTINS pa_init_builtins
244
245 #undef TARGET_RTX_COSTS
246 #define TARGET_RTX_COSTS hppa_rtx_costs
247 #undef TARGET_ADDRESS_COST
248 #define TARGET_ADDRESS_COST hppa_address_cost
249
250 #undef TARGET_MACHINE_DEPENDENT_REORG
251 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
252
253 #ifdef HPUX_LONG_DOUBLE_LIBRARY
254 #undef TARGET_INIT_LIBFUNCS
255 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
256 #endif
257
258 #undef TARGET_PROMOTE_FUNCTION_RETURN
259 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
260 #undef TARGET_PROMOTE_PROTOTYPES
261 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
262
263 #undef TARGET_STRUCT_VALUE_RTX
264 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
265 #undef TARGET_RETURN_IN_MEMORY
266 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
267
268 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
269 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
270
271 struct gcc_target targetm = TARGET_INITIALIZER;
272 \f
273 void
274 override_options (void)
275 {
276 if (pa_cpu_string == NULL)
277 pa_cpu_string = TARGET_SCHED_DEFAULT;
278
279 if (! strcmp (pa_cpu_string, "8000"))
280 {
281 pa_cpu_string = "8000";
282 pa_cpu = PROCESSOR_8000;
283 }
284 else if (! strcmp (pa_cpu_string, "7100"))
285 {
286 pa_cpu_string = "7100";
287 pa_cpu = PROCESSOR_7100;
288 }
289 else if (! strcmp (pa_cpu_string, "700"))
290 {
291 pa_cpu_string = "700";
292 pa_cpu = PROCESSOR_700;
293 }
294 else if (! strcmp (pa_cpu_string, "7100LC"))
295 {
296 pa_cpu_string = "7100LC";
297 pa_cpu = PROCESSOR_7100LC;
298 }
299 else if (! strcmp (pa_cpu_string, "7200"))
300 {
301 pa_cpu_string = "7200";
302 pa_cpu = PROCESSOR_7200;
303 }
304 else if (! strcmp (pa_cpu_string, "7300"))
305 {
306 pa_cpu_string = "7300";
307 pa_cpu = PROCESSOR_7300;
308 }
309 else
310 {
311 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
312 }
313
314 /* Set the instruction set architecture. */
315 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
316 {
317 pa_arch_string = "1.0";
318 pa_arch = ARCHITECTURE_10;
319 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
320 }
321 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
322 {
323 pa_arch_string = "1.1";
324 pa_arch = ARCHITECTURE_11;
325 target_flags &= ~MASK_PA_20;
326 target_flags |= MASK_PA_11;
327 }
328 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
329 {
330 pa_arch_string = "2.0";
331 pa_arch = ARCHITECTURE_20;
332 target_flags |= MASK_PA_11 | MASK_PA_20;
333 }
334 else if (pa_arch_string)
335 {
336 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
337 }
338
339 /* Unconditional branches in the delay slot are not compatible with dwarf2
340 call frame information. There is no benefit in using this optimization
341 on PA8000 and later processors. */
342 if (pa_cpu >= PROCESSOR_8000
343 || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
344 || flag_unwind_tables)
345 target_flags &= ~MASK_JUMP_IN_DELAY;
346
347 if (flag_pic && TARGET_PORTABLE_RUNTIME)
348 {
349 warning ("PIC code generation is not supported in the portable runtime model\n");
350 }
351
352 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
353 {
354 warning ("PIC code generation is not compatible with fast indirect calls\n");
355 }
356
357 if (! TARGET_GAS && write_symbols != NO_DEBUG)
358 {
359 warning ("-g is only supported when using GAS on this processor,");
360 warning ("-g option disabled");
361 write_symbols = NO_DEBUG;
362 }
363
364 /* We only support the "big PIC" model now. And we always generate PIC
365 code when in 64bit mode. */
366 if (flag_pic == 1 || TARGET_64BIT)
367 flag_pic = 2;
368
369 /* We can't guarantee that .dword is available for 32-bit targets. */
370 if (UNITS_PER_WORD == 4)
371 targetm.asm_out.aligned_op.di = NULL;
372
373 /* The unaligned ops are only available when using GAS. */
374 if (!TARGET_GAS)
375 {
376 targetm.asm_out.unaligned_op.hi = NULL;
377 targetm.asm_out.unaligned_op.si = NULL;
378 targetm.asm_out.unaligned_op.di = NULL;
379 }
380 }
381
382 static void
383 pa_init_builtins (void)
384 {
385 #ifdef DONT_HAVE_FPUTC_UNLOCKED
386 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
387 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
388 #endif
389 }
390
391 /* If FROM is a probable pointer register, mark TO as a probable
392 pointer register with the same pointer alignment as FROM. */
393
394 static void
395 copy_reg_pointer (rtx to, rtx from)
396 {
397 if (REG_POINTER (from))
398 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
399 }
400
401 /* Return nonzero only if OP is a register of mode MODE,
402 or CONST0_RTX. */
403 int
404 reg_or_0_operand (rtx op, enum machine_mode mode)
405 {
406 return (op == CONST0_RTX (mode) || register_operand (op, mode));
407 }
408
409 /* Return nonzero if OP is suitable for use in a call to a named
410 function.
411
412 For 2.5 try to eliminate either call_operand_address or
413 function_label_operand, they perform very similar functions. */
414 int
415 call_operand_address (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
416 {
417 return (GET_MODE (op) == word_mode
418 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
419 }
420
421 /* Return 1 if X contains a symbolic expression. We know these
422 expressions will have one of a few well defined forms, so
423 we need only check those forms. */
424 int
425 symbolic_expression_p (rtx x)
426 {
427
428 /* Strip off any HIGH. */
429 if (GET_CODE (x) == HIGH)
430 x = XEXP (x, 0);
431
432 return (symbolic_operand (x, VOIDmode));
433 }
434
435 int
436 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
437 {
438 switch (GET_CODE (op))
439 {
440 case SYMBOL_REF:
441 case LABEL_REF:
442 return 1;
443 case CONST:
444 op = XEXP (op, 0);
445 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
446 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
447 && GET_CODE (XEXP (op, 1)) == CONST_INT);
448 default:
449 return 0;
450 }
451 }
452
453 /* Return truth value of statement that OP is a symbolic memory
454 operand of mode MODE. */
455
456 int
457 symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
458 {
459 if (GET_CODE (op) == SUBREG)
460 op = SUBREG_REG (op);
461 if (GET_CODE (op) != MEM)
462 return 0;
463 op = XEXP (op, 0);
464 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
465 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
466 }
467
468 /* Return 1 if the operand is either a register, zero, or a memory operand
469 that is not symbolic. */
470
471 int
472 reg_or_0_or_nonsymb_mem_operand (rtx op, enum machine_mode mode)
473 {
474 if (register_operand (op, mode))
475 return 1;
476
477 if (op == CONST0_RTX (mode))
478 return 1;
479
480 if (GET_CODE (op) == SUBREG)
481 op = SUBREG_REG (op);
482
483 if (GET_CODE (op) != MEM)
484 return 0;
485
486 /* Until problems with management of the REG_POINTER flag are resolved,
487 we need to delay creating move insns with unscaled indexed addresses
488 until CSE is not expected. */
489 if (!TARGET_NO_SPACE_REGS
490 && !cse_not_expected
491 && GET_CODE (XEXP (op, 0)) == PLUS
492 && REG_P (XEXP (XEXP (op, 0), 0))
493 && REG_P (XEXP (XEXP (op, 0), 1)))
494 return 0;
495
496 return (!symbolic_memory_operand (op, mode)
497 && memory_address_p (mode, XEXP (op, 0)));
498 }
499
500 /* Return 1 if the operand is a register operand or a non-symbolic memory
501 operand after reload. This predicate is used for branch patterns that
502 internally handle register reloading. We need to accept non-symbolic
503 memory operands after reload to ensure that the pattern is still valid
504 if reload didn't find a hard register for the operand. */
505
506 int
507 reg_before_reload_operand (rtx op, enum machine_mode mode)
508 {
509 /* Don't accept a SUBREG since it will need a reload. */
510 if (GET_CODE (op) == SUBREG)
511 return 0;
512
513 if (register_operand (op, mode))
514 return 1;
515
516 if (reload_completed
517 && memory_operand (op, mode)
518 && !symbolic_memory_operand (op, mode))
519 return 1;
520
521 return 0;
522 }
523
524 /* Accept any constant that can be moved in one instruction into a
525 general register. */
526 int
527 cint_ok_for_move (HOST_WIDE_INT intval)
528 {
529 /* OK if ldo, ldil, or zdepi, can be used. */
530 return (CONST_OK_FOR_LETTER_P (intval, 'J')
531 || CONST_OK_FOR_LETTER_P (intval, 'N')
532 || CONST_OK_FOR_LETTER_P (intval, 'K'));
533 }
534
535 /* Return 1 iff OP is an indexed memory operand. */
536 int
537 indexed_memory_operand (rtx op, enum machine_mode mode)
538 {
539 if (GET_MODE (op) != mode)
540 return 0;
541
542 /* Before reload, a (SUBREG (MEM...)) forces reloading into a register. */
543 if (reload_completed && GET_CODE (op) == SUBREG)
544 op = SUBREG_REG (op);
545
546 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
547 return 0;
548
549 op = XEXP (op, 0);
550
551 return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op));
552 }
553
554 /* Accept anything that can be used as a destination operand for a
555 move instruction. We don't accept indexed memory operands since
556 they are supported only for floating point stores. */
557 int
558 move_dest_operand (rtx op, enum machine_mode mode)
559 {
560 if (register_operand (op, mode))
561 return 1;
562
563 if (GET_MODE (op) != mode)
564 return 0;
565
566 if (GET_CODE (op) == SUBREG)
567 op = SUBREG_REG (op);
568
569 if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
570 return 0;
571
572 op = XEXP (op, 0);
573
574 return (memory_address_p (mode, op)
575 && !IS_INDEX_ADDR_P (op)
576 && !IS_LO_SUM_DLT_ADDR_P (op));
577 }
578
579 /* Accept anything that can be used as a source operand for a move
580 instruction. */
581 int
582 move_src_operand (rtx op, enum machine_mode mode)
583 {
584 if (register_operand (op, mode))
585 return 1;
586
587 if (GET_CODE (op) == CONST_INT)
588 return cint_ok_for_move (INTVAL (op));
589
590 if (GET_MODE (op) != mode)
591 return 0;
592
593 if (GET_CODE (op) == SUBREG)
594 op = SUBREG_REG (op);
595
596 if (GET_CODE (op) != MEM)
597 return 0;
598
599 /* Until problems with management of the REG_POINTER flag are resolved,
600 we need to delay creating move insns with unscaled indexed addresses
601 until CSE is not expected. */
602 if (!TARGET_NO_SPACE_REGS
603 && !cse_not_expected
604 && GET_CODE (XEXP (op, 0)) == PLUS
605 && REG_P (XEXP (XEXP (op, 0), 0))
606 && REG_P (XEXP (XEXP (op, 0), 1)))
607 return 0;
608
609 return memory_address_p (mode, XEXP (op, 0));
610 }
611
612 /* Accept anything that can be used as the source operand for a prefetch
613 instruction. */
614 int
615 prefetch_operand (rtx op, enum machine_mode mode)
616 {
617 if (GET_CODE (op) != MEM)
618 return 0;
619
620 /* Until problems with management of the REG_POINTER flag are resolved,
621 we need to delay creating prefetch insns with unscaled indexed addresses
622 until CSE is not expected. */
623 if (!TARGET_NO_SPACE_REGS
624 && !cse_not_expected
625 && GET_CODE (XEXP (op, 0)) == PLUS
626 && REG_P (XEXP (XEXP (op, 0), 0))
627 && REG_P (XEXP (XEXP (op, 0), 1)))
628 return 0;
629
630 return memory_address_p (mode, XEXP (op, 0));
631 }
632
633 /* Accept REG and any CONST_INT that can be moved in one instruction into a
634 general register. */
635 int
636 reg_or_cint_move_operand (rtx op, enum machine_mode mode)
637 {
638 if (register_operand (op, mode))
639 return 1;
640
641 return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op)));
642 }
643
644 int
645 pic_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
646 {
647 if (!flag_pic)
648 return 0;
649
650 switch (GET_CODE (op))
651 {
652 case LABEL_REF:
653 return 1;
654 case CONST:
655 op = XEXP (op, 0);
656 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
657 && GET_CODE (XEXP (op, 1)) == CONST_INT);
658 default:
659 return 0;
660 }
661 }
662
663 int
664 fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
665 {
666 return reg_renumber && FP_REG_P (op);
667 }
668
669 \f
670
671 /* Return truth value of whether OP can be used as an operand in a
672 three operand arithmetic insn that accepts registers of mode MODE
673 or 14-bit signed integers. */
674 int
675 arith_operand (rtx op, enum machine_mode mode)
676 {
677 return (register_operand (op, mode)
678 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
679 }
680
681 /* Return truth value of whether OP can be used as an operand in a
682 three operand arithmetic insn that accepts registers of mode MODE
683 or 11-bit signed integers. */
684 int
685 arith11_operand (rtx op, enum machine_mode mode)
686 {
687 return (register_operand (op, mode)
688 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
689 }
690
691 /* Return truth value of whether OP can be used as an operand in a
692 adddi3 insn. */
693 int
694 adddi3_operand (rtx op, enum machine_mode mode)
695 {
696 return (register_operand (op, mode)
697 || (GET_CODE (op) == CONST_INT
698 && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
699 }
700
701 /* A constant integer suitable for use in a PRE_MODIFY memory
702 reference. */
703 int
704 pre_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
705 {
706 return (GET_CODE (op) == CONST_INT
707 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
708 }
709
710 /* A constant integer suitable for use in a POST_MODIFY memory
711 reference. */
712 int
713 post_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
714 {
715 return (GET_CODE (op) == CONST_INT
716 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
717 }
718
719 int
720 arith_double_operand (rtx op, enum machine_mode mode)
721 {
722 return (register_operand (op, mode)
723 || (GET_CODE (op) == CONST_DOUBLE
724 && GET_MODE (op) == mode
725 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
726 && ((CONST_DOUBLE_HIGH (op) >= 0)
727 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
728 }
729
730 /* Return truth value of whether OP is an integer which fits the
731 range constraining immediate operands in three-address insns, or
732 is an integer register. */
733
734 int
735 ireg_or_int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
736 {
737 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
738 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
739 }
740
741 /* Return nonzero if OP is an integer register, else return zero. */
742 int
743 ireg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
744 {
745 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
746 }
747
748 /* Return truth value of whether OP is an integer which fits the
749 range constraining immediate operands in three-address insns. */
750
751 int
752 int5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
753 {
754 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
755 }
756
757 int
758 uint5_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
759 {
760 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
761 }
762
763 int
764 int11_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
765 {
766 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
767 }
768
769 int
770 uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
771 {
772 #if HOST_BITS_PER_WIDE_INT > 32
773 /* All allowed constants will fit a CONST_INT. */
774 return (GET_CODE (op) == CONST_INT
775 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
776 #else
777 return (GET_CODE (op) == CONST_INT
778 || (GET_CODE (op) == CONST_DOUBLE
779 && CONST_DOUBLE_HIGH (op) == 0));
780 #endif
781 }
782
783 int
784 arith5_operand (rtx op, enum machine_mode mode)
785 {
786 return register_operand (op, mode) || int5_operand (op, mode);
787 }
788
789 /* True iff zdepi can be used to generate this CONST_INT.
790 zdepi first sign extends a 5 bit signed number to a given field
791 length, then places this field anywhere in a zero. */
792 int
793 zdepi_cint_p (unsigned HOST_WIDE_INT x)
794 {
795 unsigned HOST_WIDE_INT lsb_mask, t;
796
797 /* This might not be obvious, but it's at least fast.
798 This function is critical; we don't have the time loops would take. */
799 lsb_mask = x & -x;
800 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
801 /* Return true iff t is a power of two. */
802 return ((t & (t - 1)) == 0);
803 }
804
805 /* True iff depi or extru can be used to compute (reg & mask).
806 Accept bit pattern like these:
807 0....01....1
808 1....10....0
809 1..10..01..1 */
810 int
811 and_mask_p (unsigned HOST_WIDE_INT mask)
812 {
813 mask = ~mask;
814 mask += mask & -mask;
815 return (mask & (mask - 1)) == 0;
816 }
817
818 /* True iff depi or extru can be used to compute (reg & OP). */
819 int
820 and_operand (rtx op, enum machine_mode mode)
821 {
822 return (register_operand (op, mode)
823 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
824 }
825
826 /* True iff depi can be used to compute (reg | MASK). */
827 int
828 ior_mask_p (unsigned HOST_WIDE_INT mask)
829 {
830 mask += mask & -mask;
831 return (mask & (mask - 1)) == 0;
832 }
833
834 /* True iff depi can be used to compute (reg | OP). */
835 int
836 ior_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
837 {
838 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
839 }
840
841 int
842 lhs_lshift_operand (rtx op, enum machine_mode mode)
843 {
844 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
845 }
846
847 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
848 Such values can be the left hand side x in (x << r), using the zvdepi
849 instruction. */
850 int
851 lhs_lshift_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
852 {
853 unsigned HOST_WIDE_INT x;
854 if (GET_CODE (op) != CONST_INT)
855 return 0;
856 x = INTVAL (op) >> 4;
857 return (x & (x + 1)) == 0;
858 }
859
860 int
861 arith32_operand (rtx op, enum machine_mode mode)
862 {
863 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
864 }
865
866 int
867 pc_or_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
868 {
869 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
870 }
871 \f
872 /* Legitimize PIC addresses. If the address is already
873 position-independent, we return ORIG. Newly generated
874 position-independent addresses go to REG. If we need more
875 than one register, we lose. */
876
877 rtx
878 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
879 {
880 rtx pic_ref = orig;
881
882 /* Labels need special handling. */
883 if (pic_label_operand (orig, mode))
884 {
885 /* We do not want to go through the movXX expanders here since that
886 would create recursion.
887
888 Nor do we really want to call a generator for a named pattern
889 since that requires multiple patterns if we want to support
890 multiple word sizes.
891
892 So instead we just emit the raw set, which avoids the movXX
893 expanders completely. */
894 mark_reg_pointer (reg, BITS_PER_UNIT);
895 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
896 current_function_uses_pic_offset_table = 1;
897 return reg;
898 }
899 if (GET_CODE (orig) == SYMBOL_REF)
900 {
901 rtx insn, tmp_reg;
902
903 if (reg == 0)
904 abort ();
905
906 /* Before reload, allocate a temporary register for the intermediate
907 result. This allows the sequence to be deleted when the final
908 result is unused and the insns are trivially dead. */
909 tmp_reg = ((reload_in_progress || reload_completed)
910 ? reg : gen_reg_rtx (Pmode));
911
912 emit_move_insn (tmp_reg,
913 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
914 gen_rtx_HIGH (word_mode, orig)));
915 pic_ref
916 = gen_rtx_MEM (Pmode,
917 gen_rtx_LO_SUM (Pmode, tmp_reg,
918 gen_rtx_UNSPEC (Pmode,
919 gen_rtvec (1, orig),
920 UNSPEC_DLTIND14R)));
921
922 current_function_uses_pic_offset_table = 1;
923 MEM_NOTRAP_P (pic_ref) = 1;
924 RTX_UNCHANGING_P (pic_ref) = 1;
925 mark_reg_pointer (reg, BITS_PER_UNIT);
926 insn = emit_move_insn (reg, pic_ref);
927
928 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
929 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
930
931 return reg;
932 }
933 else if (GET_CODE (orig) == CONST)
934 {
935 rtx base;
936
937 if (GET_CODE (XEXP (orig, 0)) == PLUS
938 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
939 return orig;
940
941 if (reg == 0)
942 abort ();
943
944 if (GET_CODE (XEXP (orig, 0)) == PLUS)
945 {
946 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
947 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
948 base == reg ? 0 : reg);
949 }
950 else
951 abort ();
952
953 if (GET_CODE (orig) == CONST_INT)
954 {
955 if (INT_14_BITS (orig))
956 return plus_constant (base, INTVAL (orig));
957 orig = force_reg (Pmode, orig);
958 }
959 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
960 /* Likewise, should we set special REG_NOTEs here? */
961 }
962
963 return pic_ref;
964 }
965
966 /* Try machine-dependent ways of modifying an illegitimate address
967 to be legitimate. If we find one, return the new, valid address.
968 This macro is used in only one place: `memory_address' in explow.c.
969
970 OLDX is the address as it was before break_out_memory_refs was called.
971 In some cases it is useful to look at this to decide what needs to be done.
972
973 MODE and WIN are passed so that this macro can use
974 GO_IF_LEGITIMATE_ADDRESS.
975
976 It is always safe for this macro to do nothing. It exists to recognize
977 opportunities to optimize the output.
978
979 For the PA, transform:
980
981 memory(X + <large int>)
982
983 into:
984
985 if (<large int> & mask) >= 16
986 Y = (<large int> & ~mask) + mask + 1 Round up.
987 else
988 Y = (<large int> & ~mask) Round down.
989 Z = X + Y
990 memory (Z + (<large int> - Y));
991
992 This is for CSE to find several similar references, and only use one Z.
993
994 X can either be a SYMBOL_REF or REG, but because combine can not
995 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
996 D will not fit in 14 bits.
997
998 MODE_FLOAT references allow displacements which fit in 5 bits, so use
999 0x1f as the mask.
1000
1001 MODE_INT references allow displacements which fit in 14 bits, so use
1002 0x3fff as the mask.
1003
1004 This relies on the fact that most mode MODE_FLOAT references will use FP
1005 registers and most mode MODE_INT references will use integer registers.
1006 (In the rare case of an FP register used in an integer MODE, we depend
1007 on secondary reloads to clean things up.)
1008
1009
1010 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1011 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1012 addressing modes to be used).
1013
1014 Put X and Z into registers. Then put the entire expression into
1015 a register. */
1016
1017 rtx
1018 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1019 enum machine_mode mode)
1020 {
1021 rtx orig = x;
1022
1023 /* We need to canonicalize the order of operands in unscaled indexed
1024 addresses since the code that checks if an address is valid doesn't
1025 always try both orders. */
1026 if (!TARGET_NO_SPACE_REGS
1027 && GET_CODE (x) == PLUS
1028 && GET_MODE (x) == Pmode
1029 && REG_P (XEXP (x, 0))
1030 && REG_P (XEXP (x, 1))
1031 && REG_POINTER (XEXP (x, 0))
1032 && !REG_POINTER (XEXP (x, 1)))
1033 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1034
1035 if (flag_pic)
1036 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1037
1038 /* Strip off CONST. */
1039 if (GET_CODE (x) == CONST)
1040 x = XEXP (x, 0);
1041
1042 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1043 That should always be safe. */
1044 if (GET_CODE (x) == PLUS
1045 && GET_CODE (XEXP (x, 0)) == REG
1046 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1047 {
1048 rtx reg = force_reg (Pmode, XEXP (x, 1));
1049 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1050 }
1051
1052 /* Note we must reject symbols which represent function addresses
1053 since the assembler/linker can't handle arithmetic on plabels. */
1054 if (GET_CODE (x) == PLUS
1055 && GET_CODE (XEXP (x, 1)) == CONST_INT
1056 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1057 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1058 || GET_CODE (XEXP (x, 0)) == REG))
1059 {
1060 rtx int_part, ptr_reg;
1061 int newoffset;
1062 int offset = INTVAL (XEXP (x, 1));
1063 int mask;
1064
1065 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1066 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
1067
1068 /* Choose which way to round the offset. Round up if we
1069 are >= halfway to the next boundary. */
1070 if ((offset & mask) >= ((mask + 1) / 2))
1071 newoffset = (offset & ~ mask) + mask + 1;
1072 else
1073 newoffset = (offset & ~ mask);
1074
1075 /* If the newoffset will not fit in 14 bits (ldo), then
1076 handling this would take 4 or 5 instructions (2 to load
1077 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1078 add the new offset and the SYMBOL_REF.) Combine can
1079 not handle 4->2 or 5->2 combinations, so do not create
1080 them. */
1081 if (! VAL_14_BITS_P (newoffset)
1082 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1083 {
1084 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1085 rtx tmp_reg
1086 = force_reg (Pmode,
1087 gen_rtx_HIGH (Pmode, const_part));
1088 ptr_reg
1089 = force_reg (Pmode,
1090 gen_rtx_LO_SUM (Pmode,
1091 tmp_reg, const_part));
1092 }
1093 else
1094 {
1095 if (! VAL_14_BITS_P (newoffset))
1096 int_part = force_reg (Pmode, GEN_INT (newoffset));
1097 else
1098 int_part = GEN_INT (newoffset);
1099
1100 ptr_reg = force_reg (Pmode,
1101 gen_rtx_PLUS (Pmode,
1102 force_reg (Pmode, XEXP (x, 0)),
1103 int_part));
1104 }
1105 return plus_constant (ptr_reg, offset - newoffset);
1106 }
1107
1108 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1109
1110 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1111 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1112 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1113 && (OBJECT_P (XEXP (x, 1))
1114 || GET_CODE (XEXP (x, 1)) == SUBREG)
1115 && GET_CODE (XEXP (x, 1)) != CONST)
1116 {
1117 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1118 rtx reg1, reg2;
1119
1120 reg1 = XEXP (x, 1);
1121 if (GET_CODE (reg1) != REG)
1122 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1123
1124 reg2 = XEXP (XEXP (x, 0), 0);
1125 if (GET_CODE (reg2) != REG)
1126 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1127
1128 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1129 gen_rtx_MULT (Pmode,
1130 reg2,
1131 GEN_INT (val)),
1132 reg1));
1133 }
1134
1135 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1136
1137 Only do so for floating point modes since this is more speculative
1138 and we lose if it's an integer store. */
1139 if (GET_CODE (x) == PLUS
1140 && GET_CODE (XEXP (x, 0)) == PLUS
1141 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1142 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1143 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1144 && (mode == SFmode || mode == DFmode))
1145 {
1146
1147 /* First, try and figure out what to use as a base register. */
1148 rtx reg1, reg2, base, idx, orig_base;
1149
1150 reg1 = XEXP (XEXP (x, 0), 1);
1151 reg2 = XEXP (x, 1);
1152 base = NULL_RTX;
1153 idx = NULL_RTX;
1154
1155 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1156 then emit_move_sequence will turn on REG_POINTER so we'll know
1157 it's a base register below. */
1158 if (GET_CODE (reg1) != REG)
1159 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1160
1161 if (GET_CODE (reg2) != REG)
1162 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1163
1164 /* Figure out what the base and index are. */
1165
1166 if (GET_CODE (reg1) == REG
1167 && REG_POINTER (reg1))
1168 {
1169 base = reg1;
1170 orig_base = XEXP (XEXP (x, 0), 1);
1171 idx = gen_rtx_PLUS (Pmode,
1172 gen_rtx_MULT (Pmode,
1173 XEXP (XEXP (XEXP (x, 0), 0), 0),
1174 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1175 XEXP (x, 1));
1176 }
1177 else if (GET_CODE (reg2) == REG
1178 && REG_POINTER (reg2))
1179 {
1180 base = reg2;
1181 orig_base = XEXP (x, 1);
1182 idx = XEXP (x, 0);
1183 }
1184
1185 if (base == 0)
1186 return orig;
1187
1188 /* If the index adds a large constant, try to scale the
1189 constant so that it can be loaded with only one insn. */
1190 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1191 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1192 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1193 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1194 {
1195 /* Divide the CONST_INT by the scale factor, then add it to A. */
1196 int val = INTVAL (XEXP (idx, 1));
1197
1198 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1199 reg1 = XEXP (XEXP (idx, 0), 0);
1200 if (GET_CODE (reg1) != REG)
1201 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1202
1203 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1204
1205 /* We can now generate a simple scaled indexed address. */
1206 return
1207 force_reg
1208 (Pmode, gen_rtx_PLUS (Pmode,
1209 gen_rtx_MULT (Pmode, reg1,
1210 XEXP (XEXP (idx, 0), 1)),
1211 base));
1212 }
1213
1214 /* If B + C is still a valid base register, then add them. */
1215 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1216 && INTVAL (XEXP (idx, 1)) <= 4096
1217 && INTVAL (XEXP (idx, 1)) >= -4096)
1218 {
1219 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1220 rtx reg1, reg2;
1221
1222 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1223
1224 reg2 = XEXP (XEXP (idx, 0), 0);
1225 if (GET_CODE (reg2) != CONST_INT)
1226 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1227
1228 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1229 gen_rtx_MULT (Pmode,
1230 reg2,
1231 GEN_INT (val)),
1232 reg1));
1233 }
1234
1235 /* Get the index into a register, then add the base + index and
1236 return a register holding the result. */
1237
1238 /* First get A into a register. */
1239 reg1 = XEXP (XEXP (idx, 0), 0);
1240 if (GET_CODE (reg1) != REG)
1241 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1242
1243 /* And get B into a register. */
1244 reg2 = XEXP (idx, 1);
1245 if (GET_CODE (reg2) != REG)
1246 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1247
1248 reg1 = force_reg (Pmode,
1249 gen_rtx_PLUS (Pmode,
1250 gen_rtx_MULT (Pmode, reg1,
1251 XEXP (XEXP (idx, 0), 1)),
1252 reg2));
1253
1254 /* Add the result to our base register and return. */
1255 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1256
1257 }
1258
1259 /* Uh-oh. We might have an address for x[n-100000]. This needs
1260 special handling to avoid creating an indexed memory address
1261 with x-100000 as the base.
1262
1263 If the constant part is small enough, then it's still safe because
1264 there is a guard page at the beginning and end of the data segment.
1265
1266 Scaled references are common enough that we want to try and rearrange the
1267 terms so that we can use indexing for these addresses too. Only
1268 do the optimization for floatint point modes. */
1269
1270 if (GET_CODE (x) == PLUS
1271 && symbolic_expression_p (XEXP (x, 1)))
1272 {
1273 /* Ugly. We modify things here so that the address offset specified
1274 by the index expression is computed first, then added to x to form
1275 the entire address. */
1276
1277 rtx regx1, regx2, regy1, regy2, y;
1278
1279 /* Strip off any CONST. */
1280 y = XEXP (x, 1);
1281 if (GET_CODE (y) == CONST)
1282 y = XEXP (y, 0);
1283
1284 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1285 {
1286 /* See if this looks like
1287 (plus (mult (reg) (shadd_const))
1288 (const (plus (symbol_ref) (const_int))))
1289
1290 Where const_int is small. In that case the const
1291 expression is a valid pointer for indexing.
1292
1293 If const_int is big, but can be divided evenly by shadd_const
1294 and added to (reg). This allows more scaled indexed addresses. */
1295 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1296 && GET_CODE (XEXP (x, 0)) == MULT
1297 && GET_CODE (XEXP (y, 1)) == CONST_INT
1298 && INTVAL (XEXP (y, 1)) >= -4096
1299 && INTVAL (XEXP (y, 1)) <= 4095
1300 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1301 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1302 {
1303 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1304 rtx reg1, reg2;
1305
1306 reg1 = XEXP (x, 1);
1307 if (GET_CODE (reg1) != REG)
1308 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1309
1310 reg2 = XEXP (XEXP (x, 0), 0);
1311 if (GET_CODE (reg2) != REG)
1312 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1313
1314 return force_reg (Pmode,
1315 gen_rtx_PLUS (Pmode,
1316 gen_rtx_MULT (Pmode,
1317 reg2,
1318 GEN_INT (val)),
1319 reg1));
1320 }
1321 else if ((mode == DFmode || mode == SFmode)
1322 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1323 && GET_CODE (XEXP (x, 0)) == MULT
1324 && GET_CODE (XEXP (y, 1)) == CONST_INT
1325 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1326 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1327 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1328 {
1329 regx1
1330 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1331 / INTVAL (XEXP (XEXP (x, 0), 1))));
1332 regx2 = XEXP (XEXP (x, 0), 0);
1333 if (GET_CODE (regx2) != REG)
1334 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1335 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1336 regx2, regx1));
1337 return
1338 force_reg (Pmode,
1339 gen_rtx_PLUS (Pmode,
1340 gen_rtx_MULT (Pmode, regx2,
1341 XEXP (XEXP (x, 0), 1)),
1342 force_reg (Pmode, XEXP (y, 0))));
1343 }
1344 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1345 && INTVAL (XEXP (y, 1)) >= -4096
1346 && INTVAL (XEXP (y, 1)) <= 4095)
1347 {
1348 /* This is safe because of the guard page at the
1349 beginning and end of the data space. Just
1350 return the original address. */
1351 return orig;
1352 }
1353 else
1354 {
1355 /* Doesn't look like one we can optimize. */
1356 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1357 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1358 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1359 regx1 = force_reg (Pmode,
1360 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1361 regx1, regy2));
1362 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1363 }
1364 }
1365 }
1366
1367 return orig;
1368 }
1369
1370 /* For the HPPA, REG and REG+CONST is cost 0
1371 and addresses involving symbolic constants are cost 2.
1372
1373 PIC addresses are very expensive.
1374
1375 It is no coincidence that this has the same structure
1376 as GO_IF_LEGITIMATE_ADDRESS. */
1377
1378 static int
1379 hppa_address_cost (rtx X)
1380 {
1381 switch (GET_CODE (X))
1382 {
1383 case REG:
1384 case PLUS:
1385 case LO_SUM:
1386 return 1;
1387 case HIGH:
1388 return 2;
1389 default:
1390 return 4;
1391 }
1392 }
1393
1394 /* Compute a (partial) cost for rtx X. Return true if the complete
1395 cost has been computed, and false if subexpressions should be
1396 scanned. In either case, *TOTAL contains the cost result. */
1397
1398 static bool
1399 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1400 {
1401 switch (code)
1402 {
1403 case CONST_INT:
1404 if (INTVAL (x) == 0)
1405 *total = 0;
1406 else if (INT_14_BITS (x))
1407 *total = 1;
1408 else
1409 *total = 2;
1410 return true;
1411
1412 case HIGH:
1413 *total = 2;
1414 return true;
1415
1416 case CONST:
1417 case LABEL_REF:
1418 case SYMBOL_REF:
1419 *total = 4;
1420 return true;
1421
1422 case CONST_DOUBLE:
1423 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1424 && outer_code != SET)
1425 *total = 0;
1426 else
1427 *total = 8;
1428 return true;
1429
1430 case MULT:
1431 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1432 *total = COSTS_N_INSNS (3);
1433 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1434 *total = COSTS_N_INSNS (8);
1435 else
1436 *total = COSTS_N_INSNS (20);
1437 return true;
1438
1439 case DIV:
1440 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1441 {
1442 *total = COSTS_N_INSNS (14);
1443 return true;
1444 }
1445 /* FALLTHRU */
1446
1447 case UDIV:
1448 case MOD:
1449 case UMOD:
1450 *total = COSTS_N_INSNS (60);
1451 return true;
1452
1453 case PLUS: /* this includes shNadd insns */
1454 case MINUS:
1455 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1456 *total = COSTS_N_INSNS (3);
1457 else
1458 *total = COSTS_N_INSNS (1);
1459 return true;
1460
1461 case ASHIFT:
1462 case ASHIFTRT:
1463 case LSHIFTRT:
1464 *total = COSTS_N_INSNS (1);
1465 return true;
1466
1467 default:
1468 return false;
1469 }
1470 }
1471
1472 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1473 new rtx with the correct mode. */
1474 static inline rtx
1475 force_mode (enum machine_mode mode, rtx orig)
1476 {
1477 if (mode == GET_MODE (orig))
1478 return orig;
1479
1480 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1481 abort ();
1482
1483 return gen_rtx_REG (mode, REGNO (orig));
1484 }
1485
1486 /* Emit insns to move operands[1] into operands[0].
1487
1488 Return 1 if we have written out everything that needs to be done to
1489 do the move. Otherwise, return 0 and the caller will emit the move
1490 normally.
1491
1492 Note SCRATCH_REG may not be in the proper mode depending on how it
1493 will be used. This routine is responsible for creating a new copy
1494 of SCRATCH_REG in the proper mode. */
1495
1496 int
1497 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1498 {
1499 register rtx operand0 = operands[0];
1500 register rtx operand1 = operands[1];
1501 register rtx tem;
1502
1503 /* We can only handle indexed addresses in the destination operand
1504 of floating point stores. Thus, we need to break out indexed
1505 addresses from the destination operand. */
1506 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1507 {
1508 /* This is only safe up to the beginning of life analysis. */
1509 if (no_new_pseudos)
1510 abort ();
1511
1512 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1513 operand0 = replace_equiv_address (operand0, tem);
1514 }
1515
1516 /* On targets with non-equivalent space registers, break out unscaled
1517 indexed addresses from the source operand before the final CSE.
1518 We have to do this because the REG_POINTER flag is not correctly
1519 carried through various optimization passes and CSE may substitute
1520 a pseudo without the pointer set for one with the pointer set. As
1521 a result, we loose various opportunities to create insns with
1522 unscaled indexed addresses. */
1523 if (!TARGET_NO_SPACE_REGS
1524 && !cse_not_expected
1525 && GET_CODE (operand1) == MEM
1526 && GET_CODE (XEXP (operand1, 0)) == PLUS
1527 && REG_P (XEXP (XEXP (operand1, 0), 0))
1528 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1529 operand1
1530 = replace_equiv_address (operand1,
1531 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1532
1533 if (scratch_reg
1534 && reload_in_progress && GET_CODE (operand0) == REG
1535 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1536 operand0 = reg_equiv_mem[REGNO (operand0)];
1537 else if (scratch_reg
1538 && reload_in_progress && GET_CODE (operand0) == SUBREG
1539 && GET_CODE (SUBREG_REG (operand0)) == REG
1540 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1541 {
1542 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1543 the code which tracks sets/uses for delete_output_reload. */
1544 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1545 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1546 SUBREG_BYTE (operand0));
1547 operand0 = alter_subreg (&temp);
1548 }
1549
1550 if (scratch_reg
1551 && reload_in_progress && GET_CODE (operand1) == REG
1552 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1553 operand1 = reg_equiv_mem[REGNO (operand1)];
1554 else if (scratch_reg
1555 && reload_in_progress && GET_CODE (operand1) == SUBREG
1556 && GET_CODE (SUBREG_REG (operand1)) == REG
1557 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1558 {
1559 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1560 the code which tracks sets/uses for delete_output_reload. */
1561 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1562 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1563 SUBREG_BYTE (operand1));
1564 operand1 = alter_subreg (&temp);
1565 }
1566
1567 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1568 && ((tem = find_replacement (&XEXP (operand0, 0)))
1569 != XEXP (operand0, 0)))
1570 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1571
1572 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1573 && ((tem = find_replacement (&XEXP (operand1, 0)))
1574 != XEXP (operand1, 0)))
1575 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1576
1577 /* Handle secondary reloads for loads/stores of FP registers from
1578 REG+D addresses where D does not fit in 5 or 14 bits, including
1579 (subreg (mem (addr))) cases. */
1580 if (scratch_reg
1581 && fp_reg_operand (operand0, mode)
1582 && ((GET_CODE (operand1) == MEM
1583 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1584 XEXP (operand1, 0)))
1585 || ((GET_CODE (operand1) == SUBREG
1586 && GET_CODE (XEXP (operand1, 0)) == MEM
1587 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1588 ? SFmode : DFmode),
1589 XEXP (XEXP (operand1, 0), 0))))))
1590 {
1591 if (GET_CODE (operand1) == SUBREG)
1592 operand1 = XEXP (operand1, 0);
1593
1594 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1595 it in WORD_MODE regardless of what mode it was originally given
1596 to us. */
1597 scratch_reg = force_mode (word_mode, scratch_reg);
1598
1599 /* D might not fit in 14 bits either; for such cases load D into
1600 scratch reg. */
1601 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1602 {
1603 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1604 emit_move_insn (scratch_reg,
1605 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1606 Pmode,
1607 XEXP (XEXP (operand1, 0), 0),
1608 scratch_reg));
1609 }
1610 else
1611 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1612 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1613 gen_rtx_MEM (mode, scratch_reg)));
1614 return 1;
1615 }
1616 else if (scratch_reg
1617 && fp_reg_operand (operand1, mode)
1618 && ((GET_CODE (operand0) == MEM
1619 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1620 ? SFmode : DFmode),
1621 XEXP (operand0, 0)))
1622 || ((GET_CODE (operand0) == SUBREG)
1623 && GET_CODE (XEXP (operand0, 0)) == MEM
1624 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1625 ? SFmode : DFmode),
1626 XEXP (XEXP (operand0, 0), 0)))))
1627 {
1628 if (GET_CODE (operand0) == SUBREG)
1629 operand0 = XEXP (operand0, 0);
1630
1631 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1632 it in WORD_MODE regardless of what mode it was originally given
1633 to us. */
1634 scratch_reg = force_mode (word_mode, scratch_reg);
1635
1636 /* D might not fit in 14 bits either; for such cases load D into
1637 scratch reg. */
1638 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1639 {
1640 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1641 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1642 0)),
1643 Pmode,
1644 XEXP (XEXP (operand0, 0),
1645 0),
1646 scratch_reg));
1647 }
1648 else
1649 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1650 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1651 operand1));
1652 return 1;
1653 }
1654 /* Handle secondary reloads for loads of FP registers from constant
1655 expressions by forcing the constant into memory.
1656
1657 Use scratch_reg to hold the address of the memory location.
1658
1659 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1660 NO_REGS when presented with a const_int and a register class
1661 containing only FP registers. Doing so unfortunately creates
1662 more problems than it solves. Fix this for 2.5. */
1663 else if (scratch_reg
1664 && CONSTANT_P (operand1)
1665 && fp_reg_operand (operand0, mode))
1666 {
1667 rtx xoperands[2];
1668
1669 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1670 it in WORD_MODE regardless of what mode it was originally given
1671 to us. */
1672 scratch_reg = force_mode (word_mode, scratch_reg);
1673
1674 /* Force the constant into memory and put the address of the
1675 memory location into scratch_reg. */
1676 xoperands[0] = scratch_reg;
1677 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1678 emit_move_sequence (xoperands, Pmode, 0);
1679
1680 /* Now load the destination register. */
1681 emit_insn (gen_rtx_SET (mode, operand0,
1682 gen_rtx_MEM (mode, scratch_reg)));
1683 return 1;
1684 }
1685 /* Handle secondary reloads for SAR. These occur when trying to load
1686 the SAR from memory, FP register, or with a constant. */
1687 else if (scratch_reg
1688 && GET_CODE (operand0) == REG
1689 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1690 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1691 && (GET_CODE (operand1) == MEM
1692 || GET_CODE (operand1) == CONST_INT
1693 || (GET_CODE (operand1) == REG
1694 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1695 {
1696 /* D might not fit in 14 bits either; for such cases load D into
1697 scratch reg. */
1698 if (GET_CODE (operand1) == MEM
1699 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1700 {
1701 /* We are reloading the address into the scratch register, so we
1702 want to make sure the scratch register is a full register. */
1703 scratch_reg = force_mode (word_mode, scratch_reg);
1704
1705 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1706 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1707 0)),
1708 Pmode,
1709 XEXP (XEXP (operand1, 0),
1710 0),
1711 scratch_reg));
1712
1713 /* Now we are going to load the scratch register from memory,
1714 we want to load it in the same width as the original MEM,
1715 which must be the same as the width of the ultimate destination,
1716 OPERAND0. */
1717 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1718
1719 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1720 scratch_reg));
1721 }
1722 else
1723 {
1724 /* We want to load the scratch register using the same mode as
1725 the ultimate destination. */
1726 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1727
1728 emit_move_insn (scratch_reg, operand1);
1729 }
1730
1731 /* And emit the insn to set the ultimate destination. We know that
1732 the scratch register has the same mode as the destination at this
1733 point. */
1734 emit_move_insn (operand0, scratch_reg);
1735 return 1;
1736 }
1737 /* Handle the most common case: storing into a register. */
1738 else if (register_operand (operand0, mode))
1739 {
1740 if (register_operand (operand1, mode)
1741 || (GET_CODE (operand1) == CONST_INT
1742 && cint_ok_for_move (INTVAL (operand1)))
1743 || (operand1 == CONST0_RTX (mode))
1744 || (GET_CODE (operand1) == HIGH
1745 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1746 /* Only `general_operands' can come here, so MEM is ok. */
1747 || GET_CODE (operand1) == MEM)
1748 {
1749 /* Various sets are created during RTL generation which don't
1750 have the REG_POINTER flag correctly set. After the CSE pass,
1751 instruction recognition can fail if we don't consistently
1752 set this flag when performing register copies. This should
1753 also improve the opportunities for creating insns that use
1754 unscaled indexing. */
1755 if (REG_P (operand0) && REG_P (operand1))
1756 {
1757 if (REG_POINTER (operand1)
1758 && !REG_POINTER (operand0)
1759 && !HARD_REGISTER_P (operand0))
1760 copy_reg_pointer (operand0, operand1);
1761 else if (REG_POINTER (operand0)
1762 && !REG_POINTER (operand1)
1763 && !HARD_REGISTER_P (operand1))
1764 copy_reg_pointer (operand1, operand0);
1765 }
1766
1767 /* When MEMs are broken out, the REG_POINTER flag doesn't
1768 get set. In some cases, we can set the REG_POINTER flag
1769 from the declaration for the MEM. */
1770 if (REG_P (operand0)
1771 && GET_CODE (operand1) == MEM
1772 && !REG_POINTER (operand0))
1773 {
1774 tree decl = MEM_EXPR (operand1);
1775
1776 /* Set the register pointer flag and register alignment
1777 if the declaration for this memory reference is a
1778 pointer type. Fortran indirect argument references
1779 are ignored. */
1780 if (decl
1781 && !(flag_argument_noalias > 1
1782 && TREE_CODE (decl) == INDIRECT_REF
1783 && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1784 {
1785 tree type;
1786
1787 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1788 tree operand 1. */
1789 if (TREE_CODE (decl) == COMPONENT_REF)
1790 decl = TREE_OPERAND (decl, 1);
1791
1792 type = TREE_TYPE (decl);
1793 if (TREE_CODE (type) == ARRAY_TYPE)
1794 type = get_inner_array_type (type);
1795
1796 if (POINTER_TYPE_P (type))
1797 {
1798 int align;
1799
1800 type = TREE_TYPE (type);
1801 /* Using TYPE_ALIGN_OK is rather conservative as
1802 only the ada frontend actually sets it. */
1803 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1804 : BITS_PER_UNIT);
1805 mark_reg_pointer (operand0, align);
1806 }
1807 }
1808 }
1809
1810 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1811 return 1;
1812 }
1813 }
1814 else if (GET_CODE (operand0) == MEM)
1815 {
1816 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1817 && !(reload_in_progress || reload_completed))
1818 {
1819 rtx temp = gen_reg_rtx (DFmode);
1820
1821 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1822 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1823 return 1;
1824 }
1825 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1826 {
1827 /* Run this case quickly. */
1828 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1829 return 1;
1830 }
1831 if (! (reload_in_progress || reload_completed))
1832 {
1833 operands[0] = validize_mem (operand0);
1834 operands[1] = operand1 = force_reg (mode, operand1);
1835 }
1836 }
1837
1838 /* Simplify the source if we need to.
1839 Note we do have to handle function labels here, even though we do
1840 not consider them legitimate constants. Loop optimizations can
1841 call the emit_move_xxx with one as a source. */
1842 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1843 || function_label_operand (operand1, mode)
1844 || (GET_CODE (operand1) == HIGH
1845 && symbolic_operand (XEXP (operand1, 0), mode)))
1846 {
1847 int ishighonly = 0;
1848
1849 if (GET_CODE (operand1) == HIGH)
1850 {
1851 ishighonly = 1;
1852 operand1 = XEXP (operand1, 0);
1853 }
1854 if (symbolic_operand (operand1, mode))
1855 {
1856 /* Argh. The assembler and linker can't handle arithmetic
1857 involving plabels.
1858
1859 So we force the plabel into memory, load operand0 from
1860 the memory location, then add in the constant part. */
1861 if ((GET_CODE (operand1) == CONST
1862 && GET_CODE (XEXP (operand1, 0)) == PLUS
1863 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1864 || function_label_operand (operand1, mode))
1865 {
1866 rtx temp, const_part;
1867
1868 /* Figure out what (if any) scratch register to use. */
1869 if (reload_in_progress || reload_completed)
1870 {
1871 scratch_reg = scratch_reg ? scratch_reg : operand0;
1872 /* SCRATCH_REG will hold an address and maybe the actual
1873 data. We want it in WORD_MODE regardless of what mode it
1874 was originally given to us. */
1875 scratch_reg = force_mode (word_mode, scratch_reg);
1876 }
1877 else if (flag_pic)
1878 scratch_reg = gen_reg_rtx (Pmode);
1879
1880 if (GET_CODE (operand1) == CONST)
1881 {
1882 /* Save away the constant part of the expression. */
1883 const_part = XEXP (XEXP (operand1, 0), 1);
1884 if (GET_CODE (const_part) != CONST_INT)
1885 abort ();
1886
1887 /* Force the function label into memory. */
1888 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1889 }
1890 else
1891 {
1892 /* No constant part. */
1893 const_part = NULL_RTX;
1894
1895 /* Force the function label into memory. */
1896 temp = force_const_mem (mode, operand1);
1897 }
1898
1899
1900 /* Get the address of the memory location. PIC-ify it if
1901 necessary. */
1902 temp = XEXP (temp, 0);
1903 if (flag_pic)
1904 temp = legitimize_pic_address (temp, mode, scratch_reg);
1905
1906 /* Put the address of the memory location into our destination
1907 register. */
1908 operands[1] = temp;
1909 emit_move_sequence (operands, mode, scratch_reg);
1910
1911 /* Now load from the memory location into our destination
1912 register. */
1913 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1914 emit_move_sequence (operands, mode, scratch_reg);
1915
1916 /* And add back in the constant part. */
1917 if (const_part != NULL_RTX)
1918 expand_inc (operand0, const_part);
1919
1920 return 1;
1921 }
1922
1923 if (flag_pic)
1924 {
1925 rtx temp;
1926
1927 if (reload_in_progress || reload_completed)
1928 {
1929 temp = scratch_reg ? scratch_reg : operand0;
1930 /* TEMP will hold an address and maybe the actual
1931 data. We want it in WORD_MODE regardless of what mode it
1932 was originally given to us. */
1933 temp = force_mode (word_mode, temp);
1934 }
1935 else
1936 temp = gen_reg_rtx (Pmode);
1937
1938 /* (const (plus (symbol) (const_int))) must be forced to
1939 memory during/after reload if the const_int will not fit
1940 in 14 bits. */
1941 if (GET_CODE (operand1) == CONST
1942 && GET_CODE (XEXP (operand1, 0)) == PLUS
1943 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1944 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1945 && (reload_completed || reload_in_progress)
1946 && flag_pic)
1947 {
1948 operands[1] = force_const_mem (mode, operand1);
1949 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1950 mode, temp);
1951 operands[1] = gen_rtx_MEM (mode, operands[1]);
1952 emit_move_sequence (operands, mode, temp);
1953 }
1954 else
1955 {
1956 operands[1] = legitimize_pic_address (operand1, mode, temp);
1957 if (REG_P (operand0) && REG_P (operands[1]))
1958 copy_reg_pointer (operand0, operands[1]);
1959 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1960 }
1961 }
1962 /* On the HPPA, references to data space are supposed to use dp,
1963 register 27, but showing it in the RTL inhibits various cse
1964 and loop optimizations. */
1965 else
1966 {
1967 rtx temp, set;
1968
1969 if (reload_in_progress || reload_completed)
1970 {
1971 temp = scratch_reg ? scratch_reg : operand0;
1972 /* TEMP will hold an address and maybe the actual
1973 data. We want it in WORD_MODE regardless of what mode it
1974 was originally given to us. */
1975 temp = force_mode (word_mode, temp);
1976 }
1977 else
1978 temp = gen_reg_rtx (mode);
1979
1980 /* Loading a SYMBOL_REF into a register makes that register
1981 safe to be used as the base in an indexed address.
1982
1983 Don't mark hard registers though. That loses. */
1984 if (GET_CODE (operand0) == REG
1985 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1986 mark_reg_pointer (operand0, BITS_PER_UNIT);
1987 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1988 mark_reg_pointer (temp, BITS_PER_UNIT);
1989
1990 if (ishighonly)
1991 set = gen_rtx_SET (mode, operand0, temp);
1992 else
1993 set = gen_rtx_SET (VOIDmode,
1994 operand0,
1995 gen_rtx_LO_SUM (mode, temp, operand1));
1996
1997 emit_insn (gen_rtx_SET (VOIDmode,
1998 temp,
1999 gen_rtx_HIGH (mode, operand1)));
2000 emit_insn (set);
2001
2002 }
2003 return 1;
2004 }
2005 else if (GET_CODE (operand1) != CONST_INT
2006 || !cint_ok_for_move (INTVAL (operand1)))
2007 {
2008 rtx insn, temp;
2009 rtx op1 = operand1;
2010 HOST_WIDE_INT value = 0;
2011 HOST_WIDE_INT insv = 0;
2012 int insert = 0;
2013
2014 if (GET_CODE (operand1) == CONST_INT)
2015 value = INTVAL (operand1);
2016
2017 if (TARGET_64BIT
2018 && GET_CODE (operand1) == CONST_INT
2019 && HOST_BITS_PER_WIDE_INT > 32
2020 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2021 {
2022 HOST_WIDE_INT nval;
2023
2024 /* Extract the low order 32 bits of the value and sign extend.
2025 If the new value is the same as the original value, we can
2026 can use the original value as-is. If the new value is
2027 different, we use it and insert the most-significant 32-bits
2028 of the original value into the final result. */
2029 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2030 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2031 if (value != nval)
2032 {
2033 #if HOST_BITS_PER_WIDE_INT > 32
2034 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2035 #endif
2036 insert = 1;
2037 value = nval;
2038 operand1 = GEN_INT (nval);
2039 }
2040 }
2041
2042 if (reload_in_progress || reload_completed)
2043 temp = scratch_reg ? scratch_reg : operand0;
2044 else
2045 temp = gen_reg_rtx (mode);
2046
2047 /* We don't directly split DImode constants on 32-bit targets
2048 because PLUS uses an 11-bit immediate and the insn sequence
2049 generated is not as efficient as the one using HIGH/LO_SUM. */
2050 if (GET_CODE (operand1) == CONST_INT
2051 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2052 && !insert)
2053 {
2054 /* Directly break constant into high and low parts. This
2055 provides better optimization opportunities because various
2056 passes recognize constants split with PLUS but not LO_SUM.
2057 We use a 14-bit signed low part except when the addition
2058 of 0x4000 to the high part might change the sign of the
2059 high part. */
2060 HOST_WIDE_INT low = value & 0x3fff;
2061 HOST_WIDE_INT high = value & ~ 0x3fff;
2062
2063 if (low >= 0x2000)
2064 {
2065 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2066 high += 0x2000;
2067 else
2068 high += 0x4000;
2069 }
2070
2071 low = value - high;
2072
2073 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2074 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2075 }
2076 else
2077 {
2078 emit_insn (gen_rtx_SET (VOIDmode, temp,
2079 gen_rtx_HIGH (mode, operand1)));
2080 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2081 }
2082
2083 insn = emit_move_insn (operands[0], operands[1]);
2084
2085 /* Now insert the most significant 32 bits of the value
2086 into the register. When we don't have a second register
2087 available, it could take up to nine instructions to load
2088 a 64-bit integer constant. Prior to reload, we force
2089 constants that would take more than three instructions
2090 to load to the constant pool. During and after reload,
2091 we have to handle all possible values. */
2092 if (insert)
2093 {
2094 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2095 register and the value to be inserted is outside the
2096 range that can be loaded with three depdi instructions. */
2097 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2098 {
2099 operand1 = GEN_INT (insv);
2100
2101 emit_insn (gen_rtx_SET (VOIDmode, temp,
2102 gen_rtx_HIGH (mode, operand1)));
2103 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2104 emit_insn (gen_insv (operand0, GEN_INT (32),
2105 const0_rtx, temp));
2106 }
2107 else
2108 {
2109 int len = 5, pos = 27;
2110
2111 /* Insert the bits using the depdi instruction. */
2112 while (pos >= 0)
2113 {
2114 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2115 HOST_WIDE_INT sign = v5 < 0;
2116
2117 /* Left extend the insertion. */
2118 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2119 while (pos > 0 && (insv & 1) == sign)
2120 {
2121 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2122 len += 1;
2123 pos -= 1;
2124 }
2125
2126 emit_insn (gen_insv (operand0, GEN_INT (len),
2127 GEN_INT (pos), GEN_INT (v5)));
2128
2129 len = pos > 0 && pos < 5 ? pos : 5;
2130 pos -= len;
2131 }
2132 }
2133 }
2134
2135 REG_NOTES (insn)
2136 = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
2137
2138 return 1;
2139 }
2140 }
2141 /* Now have insn-emit do whatever it normally does. */
2142 return 0;
2143 }
2144
2145 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2146 it will need a link/runtime reloc). */
2147
2148 int
2149 reloc_needed (tree exp)
2150 {
2151 int reloc = 0;
2152
2153 switch (TREE_CODE (exp))
2154 {
2155 case ADDR_EXPR:
2156 return 1;
2157
2158 case PLUS_EXPR:
2159 case MINUS_EXPR:
2160 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2161 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2162 break;
2163
2164 case NOP_EXPR:
2165 case CONVERT_EXPR:
2166 case NON_LVALUE_EXPR:
2167 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2168 break;
2169
2170 case CONSTRUCTOR:
2171 {
2172 register tree link;
2173 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
2174 if (TREE_VALUE (link) != 0)
2175 reloc |= reloc_needed (TREE_VALUE (link));
2176 }
2177 break;
2178
2179 case ERROR_MARK:
2180 break;
2181
2182 default:
2183 break;
2184 }
2185 return reloc;
2186 }
2187
2188 /* Does operand (which is a symbolic_operand) live in text space?
2189 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2190 will be true. */
2191
2192 int
2193 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2194 {
2195 if (GET_CODE (operand) == CONST)
2196 operand = XEXP (XEXP (operand, 0), 0);
2197 if (flag_pic)
2198 {
2199 if (GET_CODE (operand) == SYMBOL_REF)
2200 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2201 }
2202 else
2203 {
2204 if (GET_CODE (operand) == SYMBOL_REF)
2205 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2206 }
2207 return 1;
2208 }
2209
2210 \f
2211 /* Return the best assembler insn template
2212 for moving operands[1] into operands[0] as a fullword. */
2213 const char *
2214 singlemove_string (rtx *operands)
2215 {
2216 HOST_WIDE_INT intval;
2217
2218 if (GET_CODE (operands[0]) == MEM)
2219 return "stw %r1,%0";
2220 if (GET_CODE (operands[1]) == MEM)
2221 return "ldw %1,%0";
2222 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2223 {
2224 long i;
2225 REAL_VALUE_TYPE d;
2226
2227 if (GET_MODE (operands[1]) != SFmode)
2228 abort ();
2229
2230 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2231 bit pattern. */
2232 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2233 REAL_VALUE_TO_TARGET_SINGLE (d, i);
2234
2235 operands[1] = GEN_INT (i);
2236 /* Fall through to CONST_INT case. */
2237 }
2238 if (GET_CODE (operands[1]) == CONST_INT)
2239 {
2240 intval = INTVAL (operands[1]);
2241
2242 if (VAL_14_BITS_P (intval))
2243 return "ldi %1,%0";
2244 else if ((intval & 0x7ff) == 0)
2245 return "ldil L'%1,%0";
2246 else if (zdepi_cint_p (intval))
2247 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2248 else
2249 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2250 }
2251 return "copy %1,%0";
2252 }
2253 \f
2254
2255 /* Compute position (in OP[1]) and width (in OP[2])
2256 useful for copying IMM to a register using the zdepi
2257 instructions. Store the immediate value to insert in OP[0]. */
2258 static void
2259 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2260 {
2261 int lsb, len;
2262
2263 /* Find the least significant set bit in IMM. */
2264 for (lsb = 0; lsb < 32; lsb++)
2265 {
2266 if ((imm & 1) != 0)
2267 break;
2268 imm >>= 1;
2269 }
2270
2271 /* Choose variants based on *sign* of the 5-bit field. */
2272 if ((imm & 0x10) == 0)
2273 len = (lsb <= 28) ? 4 : 32 - lsb;
2274 else
2275 {
2276 /* Find the width of the bitstring in IMM. */
2277 for (len = 5; len < 32; len++)
2278 {
2279 if ((imm & (1 << len)) == 0)
2280 break;
2281 }
2282
2283 /* Sign extend IMM as a 5-bit value. */
2284 imm = (imm & 0xf) - 0x10;
2285 }
2286
2287 op[0] = imm;
2288 op[1] = 31 - lsb;
2289 op[2] = len;
2290 }
2291
2292 /* Compute position (in OP[1]) and width (in OP[2])
2293 useful for copying IMM to a register using the depdi,z
2294 instructions. Store the immediate value to insert in OP[0]. */
2295 void
2296 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2297 {
2298 HOST_WIDE_INT lsb, len;
2299
2300 /* Find the least significant set bit in IMM. */
2301 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2302 {
2303 if ((imm & 1) != 0)
2304 break;
2305 imm >>= 1;
2306 }
2307
2308 /* Choose variants based on *sign* of the 5-bit field. */
2309 if ((imm & 0x10) == 0)
2310 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2311 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2312 else
2313 {
2314 /* Find the width of the bitstring in IMM. */
2315 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2316 {
2317 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2318 break;
2319 }
2320
2321 /* Sign extend IMM as a 5-bit value. */
2322 imm = (imm & 0xf) - 0x10;
2323 }
2324
2325 op[0] = imm;
2326 op[1] = 63 - lsb;
2327 op[2] = len;
2328 }
2329
2330 /* Output assembler code to perform a doubleword move insn
2331 with operands OPERANDS. */
2332
2333 const char *
2334 output_move_double (rtx *operands)
2335 {
2336 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2337 rtx latehalf[2];
2338 rtx addreg0 = 0, addreg1 = 0;
2339
2340 /* First classify both operands. */
2341
2342 if (REG_P (operands[0]))
2343 optype0 = REGOP;
2344 else if (offsettable_memref_p (operands[0]))
2345 optype0 = OFFSOP;
2346 else if (GET_CODE (operands[0]) == MEM)
2347 optype0 = MEMOP;
2348 else
2349 optype0 = RNDOP;
2350
2351 if (REG_P (operands[1]))
2352 optype1 = REGOP;
2353 else if (CONSTANT_P (operands[1]))
2354 optype1 = CNSTOP;
2355 else if (offsettable_memref_p (operands[1]))
2356 optype1 = OFFSOP;
2357 else if (GET_CODE (operands[1]) == MEM)
2358 optype1 = MEMOP;
2359 else
2360 optype1 = RNDOP;
2361
2362 /* Check for the cases that the operand constraints are not
2363 supposed to allow to happen. Abort if we get one,
2364 because generating code for these cases is painful. */
2365
2366 if (optype0 != REGOP && optype1 != REGOP)
2367 abort ();
2368
2369 /* Handle auto decrementing and incrementing loads and stores
2370 specifically, since the structure of the function doesn't work
2371 for them without major modification. Do it better when we learn
2372 this port about the general inc/dec addressing of PA.
2373 (This was written by tege. Chide him if it doesn't work.) */
2374
2375 if (optype0 == MEMOP)
2376 {
2377 /* We have to output the address syntax ourselves, since print_operand
2378 doesn't deal with the addresses we want to use. Fix this later. */
2379
2380 rtx addr = XEXP (operands[0], 0);
2381 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2382 {
2383 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2384
2385 operands[0] = XEXP (addr, 0);
2386 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2387 abort ();
2388
2389 if (!reg_overlap_mentioned_p (high_reg, addr))
2390 {
2391 /* No overlap between high target register and address
2392 register. (We do this in a non-obvious way to
2393 save a register file writeback) */
2394 if (GET_CODE (addr) == POST_INC)
2395 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2396 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2397 }
2398 else
2399 abort ();
2400 }
2401 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2402 {
2403 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2404
2405 operands[0] = XEXP (addr, 0);
2406 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2407 abort ();
2408
2409 if (!reg_overlap_mentioned_p (high_reg, addr))
2410 {
2411 /* No overlap between high target register and address
2412 register. (We do this in a non-obvious way to
2413 save a register file writeback) */
2414 if (GET_CODE (addr) == PRE_INC)
2415 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2416 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2417 }
2418 else
2419 abort ();
2420 }
2421 }
2422 if (optype1 == MEMOP)
2423 {
2424 /* We have to output the address syntax ourselves, since print_operand
2425 doesn't deal with the addresses we want to use. Fix this later. */
2426
2427 rtx addr = XEXP (operands[1], 0);
2428 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2429 {
2430 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2431
2432 operands[1] = XEXP (addr, 0);
2433 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2434 abort ();
2435
2436 if (!reg_overlap_mentioned_p (high_reg, addr))
2437 {
2438 /* No overlap between high target register and address
2439 register. (We do this in a non-obvious way to
2440 save a register file writeback) */
2441 if (GET_CODE (addr) == POST_INC)
2442 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2443 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2444 }
2445 else
2446 {
2447 /* This is an undefined situation. We should load into the
2448 address register *and* update that register. Probably
2449 we don't need to handle this at all. */
2450 if (GET_CODE (addr) == POST_INC)
2451 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2452 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2453 }
2454 }
2455 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2456 {
2457 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2458
2459 operands[1] = XEXP (addr, 0);
2460 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2461 abort ();
2462
2463 if (!reg_overlap_mentioned_p (high_reg, addr))
2464 {
2465 /* No overlap between high target register and address
2466 register. (We do this in a non-obvious way to
2467 save a register file writeback) */
2468 if (GET_CODE (addr) == PRE_INC)
2469 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2470 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2471 }
2472 else
2473 {
2474 /* This is an undefined situation. We should load into the
2475 address register *and* update that register. Probably
2476 we don't need to handle this at all. */
2477 if (GET_CODE (addr) == PRE_INC)
2478 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2479 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2480 }
2481 }
2482 else if (GET_CODE (addr) == PLUS
2483 && GET_CODE (XEXP (addr, 0)) == MULT)
2484 {
2485 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2486
2487 if (!reg_overlap_mentioned_p (high_reg, addr))
2488 {
2489 rtx xoperands[3];
2490
2491 xoperands[0] = high_reg;
2492 xoperands[1] = XEXP (addr, 1);
2493 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2494 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2495 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2496 xoperands);
2497 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2498 }
2499 else
2500 {
2501 rtx xoperands[3];
2502
2503 xoperands[0] = high_reg;
2504 xoperands[1] = XEXP (addr, 1);
2505 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2506 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2507 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2508 xoperands);
2509 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2510 }
2511 }
2512 }
2513
2514 /* If an operand is an unoffsettable memory ref, find a register
2515 we can increment temporarily to make it refer to the second word. */
2516
2517 if (optype0 == MEMOP)
2518 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2519
2520 if (optype1 == MEMOP)
2521 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2522
2523 /* Ok, we can do one word at a time.
2524 Normally we do the low-numbered word first.
2525
2526 In either case, set up in LATEHALF the operands to use
2527 for the high-numbered word and in some cases alter the
2528 operands in OPERANDS to be suitable for the low-numbered word. */
2529
2530 if (optype0 == REGOP)
2531 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2532 else if (optype0 == OFFSOP)
2533 latehalf[0] = adjust_address (operands[0], SImode, 4);
2534 else
2535 latehalf[0] = operands[0];
2536
2537 if (optype1 == REGOP)
2538 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2539 else if (optype1 == OFFSOP)
2540 latehalf[1] = adjust_address (operands[1], SImode, 4);
2541 else if (optype1 == CNSTOP)
2542 split_double (operands[1], &operands[1], &latehalf[1]);
2543 else
2544 latehalf[1] = operands[1];
2545
2546 /* If the first move would clobber the source of the second one,
2547 do them in the other order.
2548
2549 This can happen in two cases:
2550
2551 mem -> register where the first half of the destination register
2552 is the same register used in the memory's address. Reload
2553 can create such insns.
2554
2555 mem in this case will be either register indirect or register
2556 indirect plus a valid offset.
2557
2558 register -> register move where REGNO(dst) == REGNO(src + 1)
2559 someone (Tim/Tege?) claimed this can happen for parameter loads.
2560
2561 Handle mem -> register case first. */
2562 if (optype0 == REGOP
2563 && (optype1 == MEMOP || optype1 == OFFSOP)
2564 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2565 operands[1], 0))
2566 {
2567 /* Do the late half first. */
2568 if (addreg1)
2569 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2570 output_asm_insn (singlemove_string (latehalf), latehalf);
2571
2572 /* Then clobber. */
2573 if (addreg1)
2574 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2575 return singlemove_string (operands);
2576 }
2577
2578 /* Now handle register -> register case. */
2579 if (optype0 == REGOP && optype1 == REGOP
2580 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2581 {
2582 output_asm_insn (singlemove_string (latehalf), latehalf);
2583 return singlemove_string (operands);
2584 }
2585
2586 /* Normal case: do the two words, low-numbered first. */
2587
2588 output_asm_insn (singlemove_string (operands), operands);
2589
2590 /* Make any unoffsettable addresses point at high-numbered word. */
2591 if (addreg0)
2592 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2593 if (addreg1)
2594 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2595
2596 /* Do that word. */
2597 output_asm_insn (singlemove_string (latehalf), latehalf);
2598
2599 /* Undo the adds we just did. */
2600 if (addreg0)
2601 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2602 if (addreg1)
2603 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2604
2605 return "";
2606 }
2607 \f
2608 const char *
2609 output_fp_move_double (rtx *operands)
2610 {
2611 if (FP_REG_P (operands[0]))
2612 {
2613 if (FP_REG_P (operands[1])
2614 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2615 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2616 else
2617 output_asm_insn ("fldd%F1 %1,%0", operands);
2618 }
2619 else if (FP_REG_P (operands[1]))
2620 {
2621 output_asm_insn ("fstd%F0 %1,%0", operands);
2622 }
2623 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2624 {
2625 if (GET_CODE (operands[0]) == REG)
2626 {
2627 rtx xoperands[2];
2628 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2629 xoperands[0] = operands[0];
2630 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2631 }
2632 /* This is a pain. You have to be prepared to deal with an
2633 arbitrary address here including pre/post increment/decrement.
2634
2635 so avoid this in the MD. */
2636 else
2637 abort ();
2638 }
2639 else abort ();
2640 return "";
2641 }
2642 \f
2643 /* Return a REG that occurs in ADDR with coefficient 1.
2644 ADDR can be effectively incremented by incrementing REG. */
2645
2646 static rtx
2647 find_addr_reg (rtx addr)
2648 {
2649 while (GET_CODE (addr) == PLUS)
2650 {
2651 if (GET_CODE (XEXP (addr, 0)) == REG)
2652 addr = XEXP (addr, 0);
2653 else if (GET_CODE (XEXP (addr, 1)) == REG)
2654 addr = XEXP (addr, 1);
2655 else if (CONSTANT_P (XEXP (addr, 0)))
2656 addr = XEXP (addr, 1);
2657 else if (CONSTANT_P (XEXP (addr, 1)))
2658 addr = XEXP (addr, 0);
2659 else
2660 abort ();
2661 }
2662 if (GET_CODE (addr) == REG)
2663 return addr;
2664 abort ();
2665 }
2666
2667 /* Emit code to perform a block move.
2668
2669 OPERANDS[0] is the destination pointer as a REG, clobbered.
2670 OPERANDS[1] is the source pointer as a REG, clobbered.
2671 OPERANDS[2] is a register for temporary storage.
2672 OPERANDS[3] is a register for temporary storage.
2673 OPERANDS[4] is the size as a CONST_INT
2674 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2675 OPERANDS[6] is another temporary register. */
2676
2677 const char *
2678 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2679 {
2680 int align = INTVAL (operands[5]);
2681 unsigned long n_bytes = INTVAL (operands[4]);
2682
2683 /* We can't move more than a word at a time because the PA
2684 has no longer integer move insns. (Could use fp mem ops?) */
2685 if (align > (TARGET_64BIT ? 8 : 4))
2686 align = (TARGET_64BIT ? 8 : 4);
2687
2688 /* Note that we know each loop below will execute at least twice
2689 (else we would have open-coded the copy). */
2690 switch (align)
2691 {
2692 case 8:
2693 /* Pre-adjust the loop counter. */
2694 operands[4] = GEN_INT (n_bytes - 16);
2695 output_asm_insn ("ldi %4,%2", operands);
2696
2697 /* Copying loop. */
2698 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2699 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2700 output_asm_insn ("std,ma %3,8(%0)", operands);
2701 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2702 output_asm_insn ("std,ma %6,8(%0)", operands);
2703
2704 /* Handle the residual. There could be up to 7 bytes of
2705 residual to copy! */
2706 if (n_bytes % 16 != 0)
2707 {
2708 operands[4] = GEN_INT (n_bytes % 8);
2709 if (n_bytes % 16 >= 8)
2710 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2711 if (n_bytes % 8 != 0)
2712 output_asm_insn ("ldd 0(%1),%6", operands);
2713 if (n_bytes % 16 >= 8)
2714 output_asm_insn ("std,ma %3,8(%0)", operands);
2715 if (n_bytes % 8 != 0)
2716 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2717 }
2718 return "";
2719
2720 case 4:
2721 /* Pre-adjust the loop counter. */
2722 operands[4] = GEN_INT (n_bytes - 8);
2723 output_asm_insn ("ldi %4,%2", operands);
2724
2725 /* Copying loop. */
2726 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2727 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2728 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2729 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2730 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2731
2732 /* Handle the residual. There could be up to 7 bytes of
2733 residual to copy! */
2734 if (n_bytes % 8 != 0)
2735 {
2736 operands[4] = GEN_INT (n_bytes % 4);
2737 if (n_bytes % 8 >= 4)
2738 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2739 if (n_bytes % 4 != 0)
2740 output_asm_insn ("ldw 0(%1),%6", operands);
2741 if (n_bytes % 8 >= 4)
2742 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2743 if (n_bytes % 4 != 0)
2744 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2745 }
2746 return "";
2747
2748 case 2:
2749 /* Pre-adjust the loop counter. */
2750 operands[4] = GEN_INT (n_bytes - 4);
2751 output_asm_insn ("ldi %4,%2", operands);
2752
2753 /* Copying loop. */
2754 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2755 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2756 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2757 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2758 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2759
2760 /* Handle the residual. */
2761 if (n_bytes % 4 != 0)
2762 {
2763 if (n_bytes % 4 >= 2)
2764 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2765 if (n_bytes % 2 != 0)
2766 output_asm_insn ("ldb 0(%1),%6", operands);
2767 if (n_bytes % 4 >= 2)
2768 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2769 if (n_bytes % 2 != 0)
2770 output_asm_insn ("stb %6,0(%0)", operands);
2771 }
2772 return "";
2773
2774 case 1:
2775 /* Pre-adjust the loop counter. */
2776 operands[4] = GEN_INT (n_bytes - 2);
2777 output_asm_insn ("ldi %4,%2", operands);
2778
2779 /* Copying loop. */
2780 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2781 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2782 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2783 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2784 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2785
2786 /* Handle the residual. */
2787 if (n_bytes % 2 != 0)
2788 {
2789 output_asm_insn ("ldb 0(%1),%3", operands);
2790 output_asm_insn ("stb %3,0(%0)", operands);
2791 }
2792 return "";
2793
2794 default:
2795 abort ();
2796 }
2797 }
2798
2799 /* Count the number of insns necessary to handle this block move.
2800
2801 Basic structure is the same as emit_block_move, except that we
2802 count insns rather than emit them. */
2803
2804 static int
2805 compute_movmem_length (rtx insn)
2806 {
2807 rtx pat = PATTERN (insn);
2808 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2809 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2810 unsigned int n_insns = 0;
2811
2812 /* We can't move more than four bytes at a time because the PA
2813 has no longer integer move insns. (Could use fp mem ops?) */
2814 if (align > (TARGET_64BIT ? 8 : 4))
2815 align = (TARGET_64BIT ? 8 : 4);
2816
2817 /* The basic copying loop. */
2818 n_insns = 6;
2819
2820 /* Residuals. */
2821 if (n_bytes % (2 * align) != 0)
2822 {
2823 if ((n_bytes % (2 * align)) >= align)
2824 n_insns += 2;
2825
2826 if ((n_bytes % align) != 0)
2827 n_insns += 2;
2828 }
2829
2830 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2831 return n_insns * 4;
2832 }
2833
2834 /* Emit code to perform a block clear.
2835
2836 OPERANDS[0] is the destination pointer as a REG, clobbered.
2837 OPERANDS[1] is a register for temporary storage.
2838 OPERANDS[2] is the size as a CONST_INT
2839 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2840
2841 const char *
2842 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2843 {
2844 int align = INTVAL (operands[3]);
2845 unsigned long n_bytes = INTVAL (operands[2]);
2846
2847 /* We can't clear more than a word at a time because the PA
2848 has no longer integer move insns. */
2849 if (align > (TARGET_64BIT ? 8 : 4))
2850 align = (TARGET_64BIT ? 8 : 4);
2851
2852 /* Note that we know each loop below will execute at least twice
2853 (else we would have open-coded the copy). */
2854 switch (align)
2855 {
2856 case 8:
2857 /* Pre-adjust the loop counter. */
2858 operands[2] = GEN_INT (n_bytes - 16);
2859 output_asm_insn ("ldi %2,%1", operands);
2860
2861 /* Loop. */
2862 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2863 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2864 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2865
2866 /* Handle the residual. There could be up to 7 bytes of
2867 residual to copy! */
2868 if (n_bytes % 16 != 0)
2869 {
2870 operands[2] = GEN_INT (n_bytes % 8);
2871 if (n_bytes % 16 >= 8)
2872 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2873 if (n_bytes % 8 != 0)
2874 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2875 }
2876 return "";
2877
2878 case 4:
2879 /* Pre-adjust the loop counter. */
2880 operands[2] = GEN_INT (n_bytes - 8);
2881 output_asm_insn ("ldi %2,%1", operands);
2882
2883 /* Loop. */
2884 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2885 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2886 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2887
2888 /* Handle the residual. There could be up to 7 bytes of
2889 residual to copy! */
2890 if (n_bytes % 8 != 0)
2891 {
2892 operands[2] = GEN_INT (n_bytes % 4);
2893 if (n_bytes % 8 >= 4)
2894 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2895 if (n_bytes % 4 != 0)
2896 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2897 }
2898 return "";
2899
2900 case 2:
2901 /* Pre-adjust the loop counter. */
2902 operands[2] = GEN_INT (n_bytes - 4);
2903 output_asm_insn ("ldi %2,%1", operands);
2904
2905 /* Loop. */
2906 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2907 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2908 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2909
2910 /* Handle the residual. */
2911 if (n_bytes % 4 != 0)
2912 {
2913 if (n_bytes % 4 >= 2)
2914 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2915 if (n_bytes % 2 != 0)
2916 output_asm_insn ("stb %%r0,0(%0)", operands);
2917 }
2918 return "";
2919
2920 case 1:
2921 /* Pre-adjust the loop counter. */
2922 operands[2] = GEN_INT (n_bytes - 2);
2923 output_asm_insn ("ldi %2,%1", operands);
2924
2925 /* Loop. */
2926 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2927 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2928 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2929
2930 /* Handle the residual. */
2931 if (n_bytes % 2 != 0)
2932 output_asm_insn ("stb %%r0,0(%0)", operands);
2933
2934 return "";
2935
2936 default:
2937 abort ();
2938 }
2939 }
2940
2941 /* Count the number of insns necessary to handle this block move.
2942
2943 Basic structure is the same as emit_block_move, except that we
2944 count insns rather than emit them. */
2945
2946 static int
2947 compute_clrmem_length (rtx insn)
2948 {
2949 rtx pat = PATTERN (insn);
2950 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2951 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2952 unsigned int n_insns = 0;
2953
2954 /* We can't clear more than a word at a time because the PA
2955 has no longer integer move insns. */
2956 if (align > (TARGET_64BIT ? 8 : 4))
2957 align = (TARGET_64BIT ? 8 : 4);
2958
2959 /* The basic loop. */
2960 n_insns = 4;
2961
2962 /* Residuals. */
2963 if (n_bytes % (2 * align) != 0)
2964 {
2965 if ((n_bytes % (2 * align)) >= align)
2966 n_insns++;
2967
2968 if ((n_bytes % align) != 0)
2969 n_insns++;
2970 }
2971
2972 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2973 return n_insns * 4;
2974 }
2975 \f
2976
2977 const char *
2978 output_and (rtx *operands)
2979 {
2980 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2981 {
2982 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2983 int ls0, ls1, ms0, p, len;
2984
2985 for (ls0 = 0; ls0 < 32; ls0++)
2986 if ((mask & (1 << ls0)) == 0)
2987 break;
2988
2989 for (ls1 = ls0; ls1 < 32; ls1++)
2990 if ((mask & (1 << ls1)) != 0)
2991 break;
2992
2993 for (ms0 = ls1; ms0 < 32; ms0++)
2994 if ((mask & (1 << ms0)) == 0)
2995 break;
2996
2997 if (ms0 != 32)
2998 abort ();
2999
3000 if (ls1 == 32)
3001 {
3002 len = ls0;
3003
3004 if (len == 0)
3005 abort ();
3006
3007 operands[2] = GEN_INT (len);
3008 return "{extru|extrw,u} %1,31,%2,%0";
3009 }
3010 else
3011 {
3012 /* We could use this `depi' for the case above as well, but `depi'
3013 requires one more register file access than an `extru'. */
3014
3015 p = 31 - ls0;
3016 len = ls1 - ls0;
3017
3018 operands[2] = GEN_INT (p);
3019 operands[3] = GEN_INT (len);
3020 return "{depi|depwi} 0,%2,%3,%0";
3021 }
3022 }
3023 else
3024 return "and %1,%2,%0";
3025 }
3026
3027 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3028 storing the result in operands[0]. */
3029 const char *
3030 output_64bit_and (rtx *operands)
3031 {
3032 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3033 {
3034 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3035 int ls0, ls1, ms0, p, len;
3036
3037 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3038 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3039 break;
3040
3041 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3042 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3043 break;
3044
3045 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3046 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3047 break;
3048
3049 if (ms0 != HOST_BITS_PER_WIDE_INT)
3050 abort ();
3051
3052 if (ls1 == HOST_BITS_PER_WIDE_INT)
3053 {
3054 len = ls0;
3055
3056 if (len == 0)
3057 abort ();
3058
3059 operands[2] = GEN_INT (len);
3060 return "extrd,u %1,63,%2,%0";
3061 }
3062 else
3063 {
3064 /* We could use this `depi' for the case above as well, but `depi'
3065 requires one more register file access than an `extru'. */
3066
3067 p = 63 - ls0;
3068 len = ls1 - ls0;
3069
3070 operands[2] = GEN_INT (p);
3071 operands[3] = GEN_INT (len);
3072 return "depdi 0,%2,%3,%0";
3073 }
3074 }
3075 else
3076 return "and %1,%2,%0";
3077 }
3078
3079 const char *
3080 output_ior (rtx *operands)
3081 {
3082 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3083 int bs0, bs1, p, len;
3084
3085 if (INTVAL (operands[2]) == 0)
3086 return "copy %1,%0";
3087
3088 for (bs0 = 0; bs0 < 32; bs0++)
3089 if ((mask & (1 << bs0)) != 0)
3090 break;
3091
3092 for (bs1 = bs0; bs1 < 32; bs1++)
3093 if ((mask & (1 << bs1)) == 0)
3094 break;
3095
3096 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3097 abort ();
3098
3099 p = 31 - bs0;
3100 len = bs1 - bs0;
3101
3102 operands[2] = GEN_INT (p);
3103 operands[3] = GEN_INT (len);
3104 return "{depi|depwi} -1,%2,%3,%0";
3105 }
3106
3107 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3108 storing the result in operands[0]. */
3109 const char *
3110 output_64bit_ior (rtx *operands)
3111 {
3112 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3113 int bs0, bs1, p, len;
3114
3115 if (INTVAL (operands[2]) == 0)
3116 return "copy %1,%0";
3117
3118 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3119 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3120 break;
3121
3122 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3123 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3124 break;
3125
3126 if (bs1 != HOST_BITS_PER_WIDE_INT
3127 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
3128 abort ();
3129
3130 p = 63 - bs0;
3131 len = bs1 - bs0;
3132
3133 operands[2] = GEN_INT (p);
3134 operands[3] = GEN_INT (len);
3135 return "depdi -1,%2,%3,%0";
3136 }
3137 \f
3138 /* Target hook for assembling integer objects. This code handles
3139 aligned SI and DI integers specially, since function references must
3140 be preceded by P%. */
3141
3142 static bool
3143 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3144 {
3145 if (size == UNITS_PER_WORD && aligned_p
3146 && function_label_operand (x, VOIDmode))
3147 {
3148 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3149 output_addr_const (asm_out_file, x);
3150 fputc ('\n', asm_out_file);
3151 return true;
3152 }
3153 return default_assemble_integer (x, size, aligned_p);
3154 }
3155 \f
3156 /* Output an ascii string. */
3157 void
3158 output_ascii (FILE *file, const char *p, int size)
3159 {
3160 int i;
3161 int chars_output;
3162 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3163
3164 /* The HP assembler can only take strings of 256 characters at one
3165 time. This is a limitation on input line length, *not* the
3166 length of the string. Sigh. Even worse, it seems that the
3167 restriction is in number of input characters (see \xnn &
3168 \whatever). So we have to do this very carefully. */
3169
3170 fputs ("\t.STRING \"", file);
3171
3172 chars_output = 0;
3173 for (i = 0; i < size; i += 4)
3174 {
3175 int co = 0;
3176 int io = 0;
3177 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3178 {
3179 register unsigned int c = (unsigned char) p[i + io];
3180
3181 if (c == '\"' || c == '\\')
3182 partial_output[co++] = '\\';
3183 if (c >= ' ' && c < 0177)
3184 partial_output[co++] = c;
3185 else
3186 {
3187 unsigned int hexd;
3188 partial_output[co++] = '\\';
3189 partial_output[co++] = 'x';
3190 hexd = c / 16 - 0 + '0';
3191 if (hexd > '9')
3192 hexd -= '9' - 'a' + 1;
3193 partial_output[co++] = hexd;
3194 hexd = c % 16 - 0 + '0';
3195 if (hexd > '9')
3196 hexd -= '9' - 'a' + 1;
3197 partial_output[co++] = hexd;
3198 }
3199 }
3200 if (chars_output + co > 243)
3201 {
3202 fputs ("\"\n\t.STRING \"", file);
3203 chars_output = 0;
3204 }
3205 fwrite (partial_output, 1, (size_t) co, file);
3206 chars_output += co;
3207 co = 0;
3208 }
3209 fputs ("\"\n", file);
3210 }
3211
3212 /* Try to rewrite floating point comparisons & branches to avoid
3213 useless add,tr insns.
3214
3215 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3216 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3217 first attempt to remove useless add,tr insns. It is zero
3218 for the second pass as reorg sometimes leaves bogus REG_DEAD
3219 notes lying around.
3220
3221 When CHECK_NOTES is zero we can only eliminate add,tr insns
3222 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3223 instructions. */
3224 static void
3225 remove_useless_addtr_insns (int check_notes)
3226 {
3227 rtx insn;
3228 static int pass = 0;
3229
3230 /* This is fairly cheap, so always run it when optimizing. */
3231 if (optimize > 0)
3232 {
3233 int fcmp_count = 0;
3234 int fbranch_count = 0;
3235
3236 /* Walk all the insns in this function looking for fcmp & fbranch
3237 instructions. Keep track of how many of each we find. */
3238 for (insn = get_insns (); insn; insn = next_insn (insn))
3239 {
3240 rtx tmp;
3241
3242 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3243 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3244 continue;
3245
3246 tmp = PATTERN (insn);
3247
3248 /* It must be a set. */
3249 if (GET_CODE (tmp) != SET)
3250 continue;
3251
3252 /* If the destination is CCFP, then we've found an fcmp insn. */
3253 tmp = SET_DEST (tmp);
3254 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3255 {
3256 fcmp_count++;
3257 continue;
3258 }
3259
3260 tmp = PATTERN (insn);
3261 /* If this is an fbranch instruction, bump the fbranch counter. */
3262 if (GET_CODE (tmp) == SET
3263 && SET_DEST (tmp) == pc_rtx
3264 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3265 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3266 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3267 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3268 {
3269 fbranch_count++;
3270 continue;
3271 }
3272 }
3273
3274
3275 /* Find all floating point compare + branch insns. If possible,
3276 reverse the comparison & the branch to avoid add,tr insns. */
3277 for (insn = get_insns (); insn; insn = next_insn (insn))
3278 {
3279 rtx tmp, next;
3280
3281 /* Ignore anything that isn't an INSN. */
3282 if (GET_CODE (insn) != INSN)
3283 continue;
3284
3285 tmp = PATTERN (insn);
3286
3287 /* It must be a set. */
3288 if (GET_CODE (tmp) != SET)
3289 continue;
3290
3291 /* The destination must be CCFP, which is register zero. */
3292 tmp = SET_DEST (tmp);
3293 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3294 continue;
3295
3296 /* INSN should be a set of CCFP.
3297
3298 See if the result of this insn is used in a reversed FP
3299 conditional branch. If so, reverse our condition and
3300 the branch. Doing so avoids useless add,tr insns. */
3301 next = next_insn (insn);
3302 while (next)
3303 {
3304 /* Jumps, calls and labels stop our search. */
3305 if (GET_CODE (next) == JUMP_INSN
3306 || GET_CODE (next) == CALL_INSN
3307 || GET_CODE (next) == CODE_LABEL)
3308 break;
3309
3310 /* As does another fcmp insn. */
3311 if (GET_CODE (next) == INSN
3312 && GET_CODE (PATTERN (next)) == SET
3313 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3314 && REGNO (SET_DEST (PATTERN (next))) == 0)
3315 break;
3316
3317 next = next_insn (next);
3318 }
3319
3320 /* Is NEXT_INSN a branch? */
3321 if (next
3322 && GET_CODE (next) == JUMP_INSN)
3323 {
3324 rtx pattern = PATTERN (next);
3325
3326 /* If it a reversed fp conditional branch (eg uses add,tr)
3327 and CCFP dies, then reverse our conditional and the branch
3328 to avoid the add,tr. */
3329 if (GET_CODE (pattern) == SET
3330 && SET_DEST (pattern) == pc_rtx
3331 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3332 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3333 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3334 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3335 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3336 && (fcmp_count == fbranch_count
3337 || (check_notes
3338 && find_regno_note (next, REG_DEAD, 0))))
3339 {
3340 /* Reverse the branch. */
3341 tmp = XEXP (SET_SRC (pattern), 1);
3342 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3343 XEXP (SET_SRC (pattern), 2) = tmp;
3344 INSN_CODE (next) = -1;
3345
3346 /* Reverse our condition. */
3347 tmp = PATTERN (insn);
3348 PUT_CODE (XEXP (tmp, 1),
3349 (reverse_condition_maybe_unordered
3350 (GET_CODE (XEXP (tmp, 1)))));
3351 }
3352 }
3353 }
3354 }
3355
3356 pass = !pass;
3357
3358 }
3359 \f
3360 /* You may have trouble believing this, but this is the 32 bit HP-PA
3361 stack layout. Wow.
3362
3363 Offset Contents
3364
3365 Variable arguments (optional; any number may be allocated)
3366
3367 SP-(4*(N+9)) arg word N
3368 : :
3369 SP-56 arg word 5
3370 SP-52 arg word 4
3371
3372 Fixed arguments (must be allocated; may remain unused)
3373
3374 SP-48 arg word 3
3375 SP-44 arg word 2
3376 SP-40 arg word 1
3377 SP-36 arg word 0
3378
3379 Frame Marker
3380
3381 SP-32 External Data Pointer (DP)
3382 SP-28 External sr4
3383 SP-24 External/stub RP (RP')
3384 SP-20 Current RP
3385 SP-16 Static Link
3386 SP-12 Clean up
3387 SP-8 Calling Stub RP (RP'')
3388 SP-4 Previous SP
3389
3390 Top of Frame
3391
3392 SP-0 Stack Pointer (points to next available address)
3393
3394 */
3395
3396 /* This function saves registers as follows. Registers marked with ' are
3397 this function's registers (as opposed to the previous function's).
3398 If a frame_pointer isn't needed, r4 is saved as a general register;
3399 the space for the frame pointer is still allocated, though, to keep
3400 things simple.
3401
3402
3403 Top of Frame
3404
3405 SP (FP') Previous FP
3406 SP + 4 Alignment filler (sigh)
3407 SP + 8 Space for locals reserved here.
3408 .
3409 .
3410 .
3411 SP + n All call saved register used.
3412 .
3413 .
3414 .
3415 SP + o All call saved fp registers used.
3416 .
3417 .
3418 .
3419 SP + p (SP') points to next available address.
3420
3421 */
3422
3423 /* Global variables set by output_function_prologue(). */
3424 /* Size of frame. Need to know this to emit return insns from
3425 leaf procedures. */
3426 static HOST_WIDE_INT actual_fsize, local_fsize;
3427 static int save_fregs;
3428
3429 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3430 Handle case where DISP > 8k by using the add_high_const patterns.
3431
3432 Note in DISP > 8k case, we will leave the high part of the address
3433 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3434
3435 static void
3436 store_reg (int reg, HOST_WIDE_INT disp, int base)
3437 {
3438 rtx insn, dest, src, basereg;
3439
3440 src = gen_rtx_REG (word_mode, reg);
3441 basereg = gen_rtx_REG (Pmode, base);
3442 if (VAL_14_BITS_P (disp))
3443 {
3444 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3445 insn = emit_move_insn (dest, src);
3446 }
3447 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3448 {
3449 rtx delta = GEN_INT (disp);
3450 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3451
3452 emit_move_insn (tmpreg, delta);
3453 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3454 dest = gen_rtx_MEM (word_mode, tmpreg);
3455 insn = emit_move_insn (dest, src);
3456 if (DO_FRAME_NOTES)
3457 {
3458 REG_NOTES (insn)
3459 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3460 gen_rtx_SET (VOIDmode,
3461 gen_rtx_MEM (word_mode,
3462 gen_rtx_PLUS (word_mode, basereg,
3463 delta)),
3464 src),
3465 REG_NOTES (insn));
3466 }
3467 }
3468 else
3469 {
3470 rtx delta = GEN_INT (disp);
3471 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3472 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3473
3474 emit_move_insn (tmpreg, high);
3475 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3476 insn = emit_move_insn (dest, src);
3477 if (DO_FRAME_NOTES)
3478 {
3479 REG_NOTES (insn)
3480 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3481 gen_rtx_SET (VOIDmode,
3482 gen_rtx_MEM (word_mode,
3483 gen_rtx_PLUS (word_mode, basereg,
3484 delta)),
3485 src),
3486 REG_NOTES (insn));
3487 }
3488 }
3489
3490 if (DO_FRAME_NOTES)
3491 RTX_FRAME_RELATED_P (insn) = 1;
3492 }
3493
3494 /* Emit RTL to store REG at the memory location specified by BASE and then
3495 add MOD to BASE. MOD must be <= 8k. */
3496
3497 static void
3498 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3499 {
3500 rtx insn, basereg, srcreg, delta;
3501
3502 if (!VAL_14_BITS_P (mod))
3503 abort ();
3504
3505 basereg = gen_rtx_REG (Pmode, base);
3506 srcreg = gen_rtx_REG (word_mode, reg);
3507 delta = GEN_INT (mod);
3508
3509 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3510 if (DO_FRAME_NOTES)
3511 {
3512 RTX_FRAME_RELATED_P (insn) = 1;
3513
3514 /* RTX_FRAME_RELATED_P must be set on each frame related set
3515 in a parallel with more than one element. Don't set
3516 RTX_FRAME_RELATED_P in the first set if reg is temporary
3517 register 1. The effect of this operation is recorded in
3518 the initial copy. */
3519 if (reg != 1)
3520 {
3521 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3522 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3523 }
3524 else
3525 {
3526 /* The first element of a PARALLEL is always processed if it is
3527 a SET. Thus, we need an expression list for this case. */
3528 REG_NOTES (insn)
3529 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3530 gen_rtx_SET (VOIDmode, basereg,
3531 gen_rtx_PLUS (word_mode, basereg, delta)),
3532 REG_NOTES (insn));
3533 }
3534 }
3535 }
3536
3537 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3538 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3539 whether to add a frame note or not.
3540
3541 In the DISP > 8k case, we leave the high part of the address in %r1.
3542 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3543
3544 static void
3545 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3546 {
3547 rtx insn;
3548
3549 if (VAL_14_BITS_P (disp))
3550 {
3551 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3552 plus_constant (gen_rtx_REG (Pmode, base), disp));
3553 }
3554 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3555 {
3556 rtx basereg = gen_rtx_REG (Pmode, base);
3557 rtx delta = GEN_INT (disp);
3558 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3559
3560 emit_move_insn (tmpreg, delta);
3561 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3562 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3563 }
3564 else
3565 {
3566 rtx basereg = gen_rtx_REG (Pmode, base);
3567 rtx delta = GEN_INT (disp);
3568 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3569
3570 emit_move_insn (tmpreg,
3571 gen_rtx_PLUS (Pmode, basereg,
3572 gen_rtx_HIGH (Pmode, delta)));
3573 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3574 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3575 }
3576
3577 if (DO_FRAME_NOTES && note)
3578 RTX_FRAME_RELATED_P (insn) = 1;
3579 }
3580
3581 HOST_WIDE_INT
3582 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3583 {
3584 int freg_saved = 0;
3585 int i, j;
3586
3587 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3588 be consistent with the rounding and size calculation done here.
3589 Change them at the same time. */
3590
3591 /* We do our own stack alignment. First, round the size of the
3592 stack locals up to a word boundary. */
3593 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3594
3595 /* Space for previous frame pointer + filler. If any frame is
3596 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3597 waste some space here for the sake of HP compatibility. The
3598 first slot is only used when the frame pointer is needed. */
3599 if (size || frame_pointer_needed)
3600 size += STARTING_FRAME_OFFSET;
3601
3602 /* If the current function calls __builtin_eh_return, then we need
3603 to allocate stack space for registers that will hold data for
3604 the exception handler. */
3605 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3606 {
3607 unsigned int i;
3608
3609 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3610 continue;
3611 size += i * UNITS_PER_WORD;
3612 }
3613
3614 /* Account for space used by the callee general register saves. */
3615 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3616 if (regs_ever_live[i])
3617 size += UNITS_PER_WORD;
3618
3619 /* Account for space used by the callee floating point register saves. */
3620 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3621 if (regs_ever_live[i]
3622 || (!TARGET_64BIT && regs_ever_live[i + 1]))
3623 {
3624 freg_saved = 1;
3625
3626 /* We always save both halves of the FP register, so always
3627 increment the frame size by 8 bytes. */
3628 size += 8;
3629 }
3630
3631 /* If any of the floating registers are saved, account for the
3632 alignment needed for the floating point register save block. */
3633 if (freg_saved)
3634 {
3635 size = (size + 7) & ~7;
3636 if (fregs_live)
3637 *fregs_live = 1;
3638 }
3639
3640 /* The various ABIs include space for the outgoing parameters in the
3641 size of the current function's stack frame. We don't need to align
3642 for the outgoing arguments as their alignment is set by the final
3643 rounding for the frame as a whole. */
3644 size += current_function_outgoing_args_size;
3645
3646 /* Allocate space for the fixed frame marker. This space must be
3647 allocated for any function that makes calls or allocates
3648 stack space. */
3649 if (!current_function_is_leaf || size)
3650 size += TARGET_64BIT ? 48 : 32;
3651
3652 /* Finally, round to the preferred stack boundary. */
3653 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3654 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3655 }
3656
3657 /* Generate the assembly code for function entry. FILE is a stdio
3658 stream to output the code to. SIZE is an int: how many units of
3659 temporary storage to allocate.
3660
3661 Refer to the array `regs_ever_live' to determine which registers to
3662 save; `regs_ever_live[I]' is nonzero if register number I is ever
3663 used in the function. This function is responsible for knowing
3664 which registers should not be saved even if used. */
3665
3666 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3667 of memory. If any fpu reg is used in the function, we allocate
3668 such a block here, at the bottom of the frame, just in case it's needed.
3669
3670 If this function is a leaf procedure, then we may choose not
3671 to do a "save" insn. The decision about whether or not
3672 to do this is made in regclass.c. */
3673
3674 static void
3675 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3676 {
3677 /* The function's label and associated .PROC must never be
3678 separated and must be output *after* any profiling declarations
3679 to avoid changing spaces/subspaces within a procedure. */
3680 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3681 fputs ("\t.PROC\n", file);
3682
3683 /* hppa_expand_prologue does the dirty work now. We just need
3684 to output the assembler directives which denote the start
3685 of a function. */
3686 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3687 if (regs_ever_live[2])
3688 fputs (",CALLS,SAVE_RP", file);
3689 else
3690 fputs (",NO_CALLS", file);
3691
3692 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3693 at the beginning of the frame and that it is used as the frame
3694 pointer for the frame. We do this because our current frame
3695 layout doesn't conform to that specified in the the HP runtime
3696 documentation and we need a way to indicate to programs such as
3697 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3698 isn't used by HP compilers but is supported by the assembler.
3699 However, SAVE_SP is supposed to indicate that the previous stack
3700 pointer has been saved in the frame marker. */
3701 if (frame_pointer_needed)
3702 fputs (",SAVE_SP", file);
3703
3704 /* Pass on information about the number of callee register saves
3705 performed in the prologue.
3706
3707 The compiler is supposed to pass the highest register number
3708 saved, the assembler then has to adjust that number before
3709 entering it into the unwind descriptor (to account for any
3710 caller saved registers with lower register numbers than the
3711 first callee saved register). */
3712 if (gr_saved)
3713 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3714
3715 if (fr_saved)
3716 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3717
3718 fputs ("\n\t.ENTRY\n", file);
3719
3720 remove_useless_addtr_insns (0);
3721 }
3722
3723 void
3724 hppa_expand_prologue (void)
3725 {
3726 int merge_sp_adjust_with_store = 0;
3727 HOST_WIDE_INT size = get_frame_size ();
3728 HOST_WIDE_INT offset;
3729 int i;
3730 rtx insn, tmpreg;
3731
3732 gr_saved = 0;
3733 fr_saved = 0;
3734 save_fregs = 0;
3735
3736 /* Compute total size for frame pointer, filler, locals and rounding to
3737 the next word boundary. Similar code appears in compute_frame_size
3738 and must be changed in tandem with this code. */
3739 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3740 if (local_fsize || frame_pointer_needed)
3741 local_fsize += STARTING_FRAME_OFFSET;
3742
3743 actual_fsize = compute_frame_size (size, &save_fregs);
3744
3745 /* Compute a few things we will use often. */
3746 tmpreg = gen_rtx_REG (word_mode, 1);
3747
3748 /* Save RP first. The calling conventions manual states RP will
3749 always be stored into the caller's frame at sp - 20 or sp - 16
3750 depending on which ABI is in use. */
3751 if (regs_ever_live[2] || current_function_calls_eh_return)
3752 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3753
3754 /* Allocate the local frame and set up the frame pointer if needed. */
3755 if (actual_fsize != 0)
3756 {
3757 if (frame_pointer_needed)
3758 {
3759 /* Copy the old frame pointer temporarily into %r1. Set up the
3760 new stack pointer, then store away the saved old frame pointer
3761 into the stack at sp and at the same time update the stack
3762 pointer by actual_fsize bytes. Two versions, first
3763 handles small (<8k) frames. The second handles large (>=8k)
3764 frames. */
3765 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3766 if (DO_FRAME_NOTES)
3767 {
3768 /* We need to record the frame pointer save here since the
3769 new frame pointer is set in the following insn. */
3770 RTX_FRAME_RELATED_P (insn) = 1;
3771 REG_NOTES (insn)
3772 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3773 gen_rtx_SET (VOIDmode,
3774 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3775 frame_pointer_rtx),
3776 REG_NOTES (insn));
3777 }
3778
3779 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3780 if (DO_FRAME_NOTES)
3781 RTX_FRAME_RELATED_P (insn) = 1;
3782
3783 if (VAL_14_BITS_P (actual_fsize))
3784 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3785 else
3786 {
3787 /* It is incorrect to store the saved frame pointer at *sp,
3788 then increment sp (writes beyond the current stack boundary).
3789
3790 So instead use stwm to store at *sp and post-increment the
3791 stack pointer as an atomic operation. Then increment sp to
3792 finish allocating the new frame. */
3793 HOST_WIDE_INT adjust1 = 8192 - 64;
3794 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3795
3796 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3797 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3798 adjust2, 1);
3799 }
3800
3801 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3802 we need to store the previous stack pointer (frame pointer)
3803 into the frame marker on targets that use the HP unwind
3804 library. This allows the HP unwind library to be used to
3805 unwind GCC frames. However, we are not fully compatible
3806 with the HP library because our frame layout differs from
3807 that specified in the HP runtime specification.
3808
3809 We don't want a frame note on this instruction as the frame
3810 marker moves during dynamic stack allocation.
3811
3812 This instruction also serves as a blockage to prevent
3813 register spills from being scheduled before the stack
3814 pointer is raised. This is necessary as we store
3815 registers using the frame pointer as a base register,
3816 and the frame pointer is set before sp is raised. */
3817 if (TARGET_HPUX_UNWIND_LIBRARY)
3818 {
3819 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3820 GEN_INT (TARGET_64BIT ? -8 : -4));
3821
3822 emit_move_insn (gen_rtx_MEM (word_mode, addr),
3823 frame_pointer_rtx);
3824 }
3825 else
3826 emit_insn (gen_blockage ());
3827 }
3828 /* no frame pointer needed. */
3829 else
3830 {
3831 /* In some cases we can perform the first callee register save
3832 and allocating the stack frame at the same time. If so, just
3833 make a note of it and defer allocating the frame until saving
3834 the callee registers. */
3835 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3836 merge_sp_adjust_with_store = 1;
3837 /* Can not optimize. Adjust the stack frame by actual_fsize
3838 bytes. */
3839 else
3840 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3841 actual_fsize, 1);
3842 }
3843 }
3844
3845 /* Normal register save.
3846
3847 Do not save the frame pointer in the frame_pointer_needed case. It
3848 was done earlier. */
3849 if (frame_pointer_needed)
3850 {
3851 offset = local_fsize;
3852
3853 /* Saving the EH return data registers in the frame is the simplest
3854 way to get the frame unwind information emitted. We put them
3855 just before the general registers. */
3856 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3857 {
3858 unsigned int i, regno;
3859
3860 for (i = 0; ; ++i)
3861 {
3862 regno = EH_RETURN_DATA_REGNO (i);
3863 if (regno == INVALID_REGNUM)
3864 break;
3865
3866 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3867 offset += UNITS_PER_WORD;
3868 }
3869 }
3870
3871 for (i = 18; i >= 4; i--)
3872 if (regs_ever_live[i] && ! call_used_regs[i])
3873 {
3874 store_reg (i, offset, FRAME_POINTER_REGNUM);
3875 offset += UNITS_PER_WORD;
3876 gr_saved++;
3877 }
3878 /* Account for %r3 which is saved in a special place. */
3879 gr_saved++;
3880 }
3881 /* No frame pointer needed. */
3882 else
3883 {
3884 offset = local_fsize - actual_fsize;
3885
3886 /* Saving the EH return data registers in the frame is the simplest
3887 way to get the frame unwind information emitted. */
3888 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3889 {
3890 unsigned int i, regno;
3891
3892 for (i = 0; ; ++i)
3893 {
3894 regno = EH_RETURN_DATA_REGNO (i);
3895 if (regno == INVALID_REGNUM)
3896 break;
3897
3898 /* If merge_sp_adjust_with_store is nonzero, then we can
3899 optimize the first save. */
3900 if (merge_sp_adjust_with_store)
3901 {
3902 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3903 merge_sp_adjust_with_store = 0;
3904 }
3905 else
3906 store_reg (regno, offset, STACK_POINTER_REGNUM);
3907 offset += UNITS_PER_WORD;
3908 }
3909 }
3910
3911 for (i = 18; i >= 3; i--)
3912 if (regs_ever_live[i] && ! call_used_regs[i])
3913 {
3914 /* If merge_sp_adjust_with_store is nonzero, then we can
3915 optimize the first GR save. */
3916 if (merge_sp_adjust_with_store)
3917 {
3918 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3919 merge_sp_adjust_with_store = 0;
3920 }
3921 else
3922 store_reg (i, offset, STACK_POINTER_REGNUM);
3923 offset += UNITS_PER_WORD;
3924 gr_saved++;
3925 }
3926
3927 /* If we wanted to merge the SP adjustment with a GR save, but we never
3928 did any GR saves, then just emit the adjustment here. */
3929 if (merge_sp_adjust_with_store)
3930 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3931 actual_fsize, 1);
3932 }
3933
3934 /* The hppa calling conventions say that %r19, the pic offset
3935 register, is saved at sp - 32 (in this function's frame)
3936 when generating PIC code. FIXME: What is the correct thing
3937 to do for functions which make no calls and allocate no
3938 frame? Do we need to allocate a frame, or can we just omit
3939 the save? For now we'll just omit the save.
3940
3941 We don't want a note on this insn as the frame marker can
3942 move if there is a dynamic stack allocation. */
3943 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3944 {
3945 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3946
3947 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3948
3949 }
3950
3951 /* Align pointer properly (doubleword boundary). */
3952 offset = (offset + 7) & ~7;
3953
3954 /* Floating point register store. */
3955 if (save_fregs)
3956 {
3957 rtx base;
3958
3959 /* First get the frame or stack pointer to the start of the FP register
3960 save area. */
3961 if (frame_pointer_needed)
3962 {
3963 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3964 base = frame_pointer_rtx;
3965 }
3966 else
3967 {
3968 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3969 base = stack_pointer_rtx;
3970 }
3971
3972 /* Now actually save the FP registers. */
3973 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3974 {
3975 if (regs_ever_live[i]
3976 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3977 {
3978 rtx addr, insn, reg;
3979 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3980 reg = gen_rtx_REG (DFmode, i);
3981 insn = emit_move_insn (addr, reg);
3982 if (DO_FRAME_NOTES)
3983 {
3984 RTX_FRAME_RELATED_P (insn) = 1;
3985 if (TARGET_64BIT)
3986 {
3987 rtx mem = gen_rtx_MEM (DFmode,
3988 plus_constant (base, offset));
3989 REG_NOTES (insn)
3990 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3991 gen_rtx_SET (VOIDmode, mem, reg),
3992 REG_NOTES (insn));
3993 }
3994 else
3995 {
3996 rtx meml = gen_rtx_MEM (SFmode,
3997 plus_constant (base, offset));
3998 rtx memr = gen_rtx_MEM (SFmode,
3999 plus_constant (base, offset + 4));
4000 rtx regl = gen_rtx_REG (SFmode, i);
4001 rtx regr = gen_rtx_REG (SFmode, i + 1);
4002 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4003 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4004 rtvec vec;
4005
4006 RTX_FRAME_RELATED_P (setl) = 1;
4007 RTX_FRAME_RELATED_P (setr) = 1;
4008 vec = gen_rtvec (2, setl, setr);
4009 REG_NOTES (insn)
4010 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4011 gen_rtx_SEQUENCE (VOIDmode, vec),
4012 REG_NOTES (insn));
4013 }
4014 }
4015 offset += GET_MODE_SIZE (DFmode);
4016 fr_saved++;
4017 }
4018 }
4019 }
4020 }
4021
4022 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4023 Handle case where DISP > 8k by using the add_high_const patterns. */
4024
4025 static void
4026 load_reg (int reg, HOST_WIDE_INT disp, int base)
4027 {
4028 rtx dest = gen_rtx_REG (word_mode, reg);
4029 rtx basereg = gen_rtx_REG (Pmode, base);
4030 rtx src;
4031
4032 if (VAL_14_BITS_P (disp))
4033 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4034 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4035 {
4036 rtx delta = GEN_INT (disp);
4037 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4038
4039 emit_move_insn (tmpreg, delta);
4040 if (TARGET_DISABLE_INDEXING)
4041 {
4042 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4043 src = gen_rtx_MEM (word_mode, tmpreg);
4044 }
4045 else
4046 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4047 }
4048 else
4049 {
4050 rtx delta = GEN_INT (disp);
4051 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4052 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4053
4054 emit_move_insn (tmpreg, high);
4055 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4056 }
4057
4058 emit_move_insn (dest, src);
4059 }
4060
4061 /* Update the total code bytes output to the text section. */
4062
4063 static void
4064 update_total_code_bytes (int nbytes)
4065 {
4066 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4067 && !IN_NAMED_SECTION_P (cfun->decl))
4068 {
4069 if (INSN_ADDRESSES_SET_P ())
4070 {
4071 unsigned long old_total = total_code_bytes;
4072
4073 total_code_bytes += nbytes;
4074
4075 /* Be prepared to handle overflows. */
4076 if (old_total > total_code_bytes)
4077 total_code_bytes = -1;
4078 }
4079 else
4080 total_code_bytes = -1;
4081 }
4082 }
4083
4084 /* This function generates the assembly code for function exit.
4085 Args are as for output_function_prologue ().
4086
4087 The function epilogue should not depend on the current stack
4088 pointer! It should use the frame pointer only. This is mandatory
4089 because of alloca; we also take advantage of it to omit stack
4090 adjustments before returning. */
4091
4092 static void
4093 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4094 {
4095 rtx insn = get_last_insn ();
4096
4097 last_address = 0;
4098
4099 /* hppa_expand_epilogue does the dirty work now. We just need
4100 to output the assembler directives which denote the end
4101 of a function.
4102
4103 To make debuggers happy, emit a nop if the epilogue was completely
4104 eliminated due to a volatile call as the last insn in the
4105 current function. That way the return address (in %r2) will
4106 always point to a valid instruction in the current function. */
4107
4108 /* Get the last real insn. */
4109 if (GET_CODE (insn) == NOTE)
4110 insn = prev_real_insn (insn);
4111
4112 /* If it is a sequence, then look inside. */
4113 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4114 insn = XVECEXP (PATTERN (insn), 0, 0);
4115
4116 /* If insn is a CALL_INSN, then it must be a call to a volatile
4117 function (otherwise there would be epilogue insns). */
4118 if (insn && GET_CODE (insn) == CALL_INSN)
4119 {
4120 fputs ("\tnop\n", file);
4121 last_address += 4;
4122 }
4123
4124 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4125
4126 if (INSN_ADDRESSES_SET_P ())
4127 {
4128 insn = get_last_nonnote_insn ();
4129 last_address += INSN_ADDRESSES (INSN_UID (insn));
4130 if (INSN_P (insn))
4131 last_address += insn_default_length (insn);
4132 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4133 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4134 }
4135
4136 /* Finally, update the total number of code bytes output so far. */
4137 update_total_code_bytes (last_address);
4138 }
4139
4140 void
4141 hppa_expand_epilogue (void)
4142 {
4143 rtx tmpreg;
4144 HOST_WIDE_INT offset;
4145 HOST_WIDE_INT ret_off = 0;
4146 int i;
4147 int merge_sp_adjust_with_load = 0;
4148
4149 /* We will use this often. */
4150 tmpreg = gen_rtx_REG (word_mode, 1);
4151
4152 /* Try to restore RP early to avoid load/use interlocks when
4153 RP gets used in the return (bv) instruction. This appears to still
4154 be necessary even when we schedule the prologue and epilogue. */
4155 if (regs_ever_live [2] || current_function_calls_eh_return)
4156 {
4157 ret_off = TARGET_64BIT ? -16 : -20;
4158 if (frame_pointer_needed)
4159 {
4160 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4161 ret_off = 0;
4162 }
4163 else
4164 {
4165 /* No frame pointer, and stack is smaller than 8k. */
4166 if (VAL_14_BITS_P (ret_off - actual_fsize))
4167 {
4168 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4169 ret_off = 0;
4170 }
4171 }
4172 }
4173
4174 /* General register restores. */
4175 if (frame_pointer_needed)
4176 {
4177 offset = local_fsize;
4178
4179 /* If the current function calls __builtin_eh_return, then we need
4180 to restore the saved EH data registers. */
4181 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4182 {
4183 unsigned int i, regno;
4184
4185 for (i = 0; ; ++i)
4186 {
4187 regno = EH_RETURN_DATA_REGNO (i);
4188 if (regno == INVALID_REGNUM)
4189 break;
4190
4191 load_reg (regno, offset, FRAME_POINTER_REGNUM);
4192 offset += UNITS_PER_WORD;
4193 }
4194 }
4195
4196 for (i = 18; i >= 4; i--)
4197 if (regs_ever_live[i] && ! call_used_regs[i])
4198 {
4199 load_reg (i, offset, FRAME_POINTER_REGNUM);
4200 offset += UNITS_PER_WORD;
4201 }
4202 }
4203 else
4204 {
4205 offset = local_fsize - actual_fsize;
4206
4207 /* If the current function calls __builtin_eh_return, then we need
4208 to restore the saved EH data registers. */
4209 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4210 {
4211 unsigned int i, regno;
4212
4213 for (i = 0; ; ++i)
4214 {
4215 regno = EH_RETURN_DATA_REGNO (i);
4216 if (regno == INVALID_REGNUM)
4217 break;
4218
4219 /* Only for the first load.
4220 merge_sp_adjust_with_load holds the register load
4221 with which we will merge the sp adjustment. */
4222 if (merge_sp_adjust_with_load == 0
4223 && local_fsize == 0
4224 && VAL_14_BITS_P (-actual_fsize))
4225 merge_sp_adjust_with_load = regno;
4226 else
4227 load_reg (regno, offset, STACK_POINTER_REGNUM);
4228 offset += UNITS_PER_WORD;
4229 }
4230 }
4231
4232 for (i = 18; i >= 3; i--)
4233 {
4234 if (regs_ever_live[i] && ! call_used_regs[i])
4235 {
4236 /* Only for the first load.
4237 merge_sp_adjust_with_load holds the register load
4238 with which we will merge the sp adjustment. */
4239 if (merge_sp_adjust_with_load == 0
4240 && local_fsize == 0
4241 && VAL_14_BITS_P (-actual_fsize))
4242 merge_sp_adjust_with_load = i;
4243 else
4244 load_reg (i, offset, STACK_POINTER_REGNUM);
4245 offset += UNITS_PER_WORD;
4246 }
4247 }
4248 }
4249
4250 /* Align pointer properly (doubleword boundary). */
4251 offset = (offset + 7) & ~7;
4252
4253 /* FP register restores. */
4254 if (save_fregs)
4255 {
4256 /* Adjust the register to index off of. */
4257 if (frame_pointer_needed)
4258 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4259 else
4260 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4261
4262 /* Actually do the restores now. */
4263 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4264 if (regs_ever_live[i]
4265 || (! TARGET_64BIT && regs_ever_live[i + 1]))
4266 {
4267 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4268 rtx dest = gen_rtx_REG (DFmode, i);
4269 emit_move_insn (dest, src);
4270 }
4271 }
4272
4273 /* Emit a blockage insn here to keep these insns from being moved to
4274 an earlier spot in the epilogue, or into the main instruction stream.
4275
4276 This is necessary as we must not cut the stack back before all the
4277 restores are finished. */
4278 emit_insn (gen_blockage ());
4279
4280 /* Reset stack pointer (and possibly frame pointer). The stack
4281 pointer is initially set to fp + 64 to avoid a race condition. */
4282 if (frame_pointer_needed)
4283 {
4284 rtx delta = GEN_INT (-64);
4285
4286 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4287 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4288 }
4289 /* If we were deferring a callee register restore, do it now. */
4290 else if (merge_sp_adjust_with_load)
4291 {
4292 rtx delta = GEN_INT (-actual_fsize);
4293 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4294
4295 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4296 }
4297 else if (actual_fsize != 0)
4298 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4299 - actual_fsize, 0);
4300
4301 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4302 frame greater than 8k), do so now. */
4303 if (ret_off != 0)
4304 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4305
4306 if (DO_FRAME_NOTES && current_function_calls_eh_return)
4307 {
4308 rtx sa = EH_RETURN_STACKADJ_RTX;
4309
4310 emit_insn (gen_blockage ());
4311 emit_insn (TARGET_64BIT
4312 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4313 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4314 }
4315 }
4316
4317 rtx
4318 hppa_pic_save_rtx (void)
4319 {
4320 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4321 }
4322
4323 void
4324 hppa_profile_hook (int label_no)
4325 {
4326 /* We use SImode for the address of the function in both 32 and
4327 64-bit code to avoid having to provide DImode versions of the
4328 lcla2 and load_offset_label_address insn patterns. */
4329 rtx reg = gen_reg_rtx (SImode);
4330 rtx label_rtx = gen_label_rtx ();
4331 rtx begin_label_rtx, call_insn;
4332 char begin_label_name[16];
4333
4334 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4335 label_no);
4336 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4337
4338 if (TARGET_64BIT)
4339 emit_move_insn (arg_pointer_rtx,
4340 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4341 GEN_INT (64)));
4342
4343 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4344
4345 /* The address of the function is loaded into %r25 with a instruction-
4346 relative sequence that avoids the use of relocations. The sequence
4347 is split so that the load_offset_label_address instruction can
4348 occupy the delay slot of the call to _mcount. */
4349 if (TARGET_PA_20)
4350 emit_insn (gen_lcla2 (reg, label_rtx));
4351 else
4352 emit_insn (gen_lcla1 (reg, label_rtx));
4353
4354 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4355 reg, begin_label_rtx, label_rtx));
4356
4357 #ifndef NO_PROFILE_COUNTERS
4358 {
4359 rtx count_label_rtx, addr, r24;
4360 char count_label_name[16];
4361
4362 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4363 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4364
4365 addr = force_reg (Pmode, count_label_rtx);
4366 r24 = gen_rtx_REG (Pmode, 24);
4367 emit_move_insn (r24, addr);
4368
4369 call_insn =
4370 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4371 gen_rtx_SYMBOL_REF (Pmode,
4372 "_mcount")),
4373 GEN_INT (TARGET_64BIT ? 24 : 12)));
4374
4375 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4376 }
4377 #else
4378
4379 call_insn =
4380 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4381 gen_rtx_SYMBOL_REF (Pmode,
4382 "_mcount")),
4383 GEN_INT (TARGET_64BIT ? 16 : 8)));
4384
4385 #endif
4386
4387 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4388 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4389
4390 /* Indicate the _mcount call cannot throw, nor will it execute a
4391 non-local goto. */
4392 REG_NOTES (call_insn)
4393 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4394 }
4395
4396 /* Fetch the return address for the frame COUNT steps up from
4397 the current frame, after the prologue. FRAMEADDR is the
4398 frame pointer of the COUNT frame.
4399
4400 We want to ignore any export stub remnants here. To handle this,
4401 we examine the code at the return address, and if it is an export
4402 stub, we return a memory rtx for the stub return address stored
4403 at frame-24.
4404
4405 The value returned is used in two different ways:
4406
4407 1. To find a function's caller.
4408
4409 2. To change the return address for a function.
4410
4411 This function handles most instances of case 1; however, it will
4412 fail if there are two levels of stubs to execute on the return
4413 path. The only way I believe that can happen is if the return value
4414 needs a parameter relocation, which never happens for C code.
4415
4416 This function handles most instances of case 2; however, it will
4417 fail if we did not originally have stub code on the return path
4418 but will need stub code on the new return path. This can happen if
4419 the caller & callee are both in the main program, but the new
4420 return location is in a shared library. */
4421
4422 rtx
4423 return_addr_rtx (int count, rtx frameaddr)
4424 {
4425 rtx label;
4426 rtx rp;
4427 rtx saved_rp;
4428 rtx ins;
4429
4430 if (count != 0)
4431 return NULL_RTX;
4432
4433 rp = get_hard_reg_initial_val (Pmode, 2);
4434
4435 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4436 return rp;
4437
4438 saved_rp = gen_reg_rtx (Pmode);
4439 emit_move_insn (saved_rp, rp);
4440
4441 /* Get pointer to the instruction stream. We have to mask out the
4442 privilege level from the two low order bits of the return address
4443 pointer here so that ins will point to the start of the first
4444 instruction that would have been executed if we returned. */
4445 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4446 label = gen_label_rtx ();
4447
4448 /* Check the instruction stream at the normal return address for the
4449 export stub:
4450
4451 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4452 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4453 0x00011820 | stub+16: mtsp r1,sr0
4454 0xe0400002 | stub+20: be,n 0(sr0,rp)
4455
4456 If it is an export stub, than our return address is really in
4457 -24[frameaddr]. */
4458
4459 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4460 NULL_RTX, SImode, 1);
4461 emit_jump_insn (gen_bne (label));
4462
4463 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4464 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4465 emit_jump_insn (gen_bne (label));
4466
4467 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4468 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4469 emit_jump_insn (gen_bne (label));
4470
4471 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4472 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
4473
4474 /* If there is no export stub then just use the value saved from
4475 the return pointer register. */
4476
4477 emit_jump_insn (gen_bne (label));
4478
4479 /* Here we know that our return address points to an export
4480 stub. We don't want to return the address of the export stub,
4481 but rather the return address of the export stub. That return
4482 address is stored at -24[frameaddr]. */
4483
4484 emit_move_insn (saved_rp,
4485 gen_rtx_MEM (Pmode,
4486 memory_address (Pmode,
4487 plus_constant (frameaddr,
4488 -24))));
4489
4490 emit_label (label);
4491 return saved_rp;
4492 }
4493
4494 /* This is only valid once reload has completed because it depends on
4495 knowing exactly how much (if any) frame there is and...
4496
4497 It's only valid if there is no frame marker to de-allocate and...
4498
4499 It's only valid if %r2 hasn't been saved into the caller's frame
4500 (we're not profiling and %r2 isn't live anywhere). */
4501 int
4502 hppa_can_use_return_insn_p (void)
4503 {
4504 return (reload_completed
4505 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4506 && ! regs_ever_live[2]
4507 && ! frame_pointer_needed);
4508 }
4509
4510 void
4511 emit_bcond_fp (enum rtx_code code, rtx operand0)
4512 {
4513 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4514 gen_rtx_IF_THEN_ELSE (VOIDmode,
4515 gen_rtx_fmt_ee (code,
4516 VOIDmode,
4517 gen_rtx_REG (CCFPmode, 0),
4518 const0_rtx),
4519 gen_rtx_LABEL_REF (VOIDmode, operand0),
4520 pc_rtx)));
4521
4522 }
4523
4524 rtx
4525 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4526 {
4527 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4528 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4529 }
4530
4531 /* Adjust the cost of a scheduling dependency. Return the new cost of
4532 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4533
4534 static int
4535 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4536 {
4537 enum attr_type attr_type;
4538
4539 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4540 true dependencies as they are described with bypasses now. */
4541 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4542 return cost;
4543
4544 if (! recog_memoized (insn))
4545 return 0;
4546
4547 attr_type = get_attr_type (insn);
4548
4549 if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
4550 {
4551 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4552 cycles later. */
4553
4554 if (attr_type == TYPE_FPLOAD)
4555 {
4556 rtx pat = PATTERN (insn);
4557 rtx dep_pat = PATTERN (dep_insn);
4558 if (GET_CODE (pat) == PARALLEL)
4559 {
4560 /* This happens for the fldXs,mb patterns. */
4561 pat = XVECEXP (pat, 0, 0);
4562 }
4563 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4564 /* If this happens, we have to extend this to schedule
4565 optimally. Return 0 for now. */
4566 return 0;
4567
4568 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4569 {
4570 if (! recog_memoized (dep_insn))
4571 return 0;
4572 switch (get_attr_type (dep_insn))
4573 {
4574 case TYPE_FPALU:
4575 case TYPE_FPMULSGL:
4576 case TYPE_FPMULDBL:
4577 case TYPE_FPDIVSGL:
4578 case TYPE_FPDIVDBL:
4579 case TYPE_FPSQRTSGL:
4580 case TYPE_FPSQRTDBL:
4581 /* A fpload can't be issued until one cycle before a
4582 preceding arithmetic operation has finished if
4583 the target of the fpload is any of the sources
4584 (or destination) of the arithmetic operation. */
4585 return insn_default_latency (dep_insn) - 1;
4586
4587 default:
4588 return 0;
4589 }
4590 }
4591 }
4592 else if (attr_type == TYPE_FPALU)
4593 {
4594 rtx pat = PATTERN (insn);
4595 rtx dep_pat = PATTERN (dep_insn);
4596 if (GET_CODE (pat) == PARALLEL)
4597 {
4598 /* This happens for the fldXs,mb patterns. */
4599 pat = XVECEXP (pat, 0, 0);
4600 }
4601 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4602 /* If this happens, we have to extend this to schedule
4603 optimally. Return 0 for now. */
4604 return 0;
4605
4606 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4607 {
4608 if (! recog_memoized (dep_insn))
4609 return 0;
4610 switch (get_attr_type (dep_insn))
4611 {
4612 case TYPE_FPDIVSGL:
4613 case TYPE_FPDIVDBL:
4614 case TYPE_FPSQRTSGL:
4615 case TYPE_FPSQRTDBL:
4616 /* An ALU flop can't be issued until two cycles before a
4617 preceding divide or sqrt operation has finished if
4618 the target of the ALU flop is any of the sources
4619 (or destination) of the divide or sqrt operation. */
4620 return insn_default_latency (dep_insn) - 2;
4621
4622 default:
4623 return 0;
4624 }
4625 }
4626 }
4627
4628 /* For other anti dependencies, the cost is 0. */
4629 return 0;
4630 }
4631 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4632 {
4633 /* Output dependency; DEP_INSN writes a register that INSN writes some
4634 cycles later. */
4635 if (attr_type == TYPE_FPLOAD)
4636 {
4637 rtx pat = PATTERN (insn);
4638 rtx dep_pat = PATTERN (dep_insn);
4639 if (GET_CODE (pat) == PARALLEL)
4640 {
4641 /* This happens for the fldXs,mb patterns. */
4642 pat = XVECEXP (pat, 0, 0);
4643 }
4644 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4645 /* If this happens, we have to extend this to schedule
4646 optimally. Return 0 for now. */
4647 return 0;
4648
4649 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4650 {
4651 if (! recog_memoized (dep_insn))
4652 return 0;
4653 switch (get_attr_type (dep_insn))
4654 {
4655 case TYPE_FPALU:
4656 case TYPE_FPMULSGL:
4657 case TYPE_FPMULDBL:
4658 case TYPE_FPDIVSGL:
4659 case TYPE_FPDIVDBL:
4660 case TYPE_FPSQRTSGL:
4661 case TYPE_FPSQRTDBL:
4662 /* A fpload can't be issued until one cycle before a
4663 preceding arithmetic operation has finished if
4664 the target of the fpload is the destination of the
4665 arithmetic operation.
4666
4667 Exception: For PA7100LC, PA7200 and PA7300, the cost
4668 is 3 cycles, unless they bundle together. We also
4669 pay the penalty if the second insn is a fpload. */
4670 return insn_default_latency (dep_insn) - 1;
4671
4672 default:
4673 return 0;
4674 }
4675 }
4676 }
4677 else if (attr_type == TYPE_FPALU)
4678 {
4679 rtx pat = PATTERN (insn);
4680 rtx dep_pat = PATTERN (dep_insn);
4681 if (GET_CODE (pat) == PARALLEL)
4682 {
4683 /* This happens for the fldXs,mb patterns. */
4684 pat = XVECEXP (pat, 0, 0);
4685 }
4686 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4687 /* If this happens, we have to extend this to schedule
4688 optimally. Return 0 for now. */
4689 return 0;
4690
4691 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4692 {
4693 if (! recog_memoized (dep_insn))
4694 return 0;
4695 switch (get_attr_type (dep_insn))
4696 {
4697 case TYPE_FPDIVSGL:
4698 case TYPE_FPDIVDBL:
4699 case TYPE_FPSQRTSGL:
4700 case TYPE_FPSQRTDBL:
4701 /* An ALU flop can't be issued until two cycles before a
4702 preceding divide or sqrt operation has finished if
4703 the target of the ALU flop is also the target of
4704 the divide or sqrt operation. */
4705 return insn_default_latency (dep_insn) - 2;
4706
4707 default:
4708 return 0;
4709 }
4710 }
4711 }
4712
4713 /* For other output dependencies, the cost is 0. */
4714 return 0;
4715 }
4716 else
4717 abort ();
4718 }
4719
4720 /* Adjust scheduling priorities. We use this to try and keep addil
4721 and the next use of %r1 close together. */
4722 static int
4723 pa_adjust_priority (rtx insn, int priority)
4724 {
4725 rtx set = single_set (insn);
4726 rtx src, dest;
4727 if (set)
4728 {
4729 src = SET_SRC (set);
4730 dest = SET_DEST (set);
4731 if (GET_CODE (src) == LO_SUM
4732 && symbolic_operand (XEXP (src, 1), VOIDmode)
4733 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4734 priority >>= 3;
4735
4736 else if (GET_CODE (src) == MEM
4737 && GET_CODE (XEXP (src, 0)) == LO_SUM
4738 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4739 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4740 priority >>= 1;
4741
4742 else if (GET_CODE (dest) == MEM
4743 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4744 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4745 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4746 priority >>= 3;
4747 }
4748 return priority;
4749 }
4750
4751 /* The 700 can only issue a single insn at a time.
4752 The 7XXX processors can issue two insns at a time.
4753 The 8000 can issue 4 insns at a time. */
4754 static int
4755 pa_issue_rate (void)
4756 {
4757 switch (pa_cpu)
4758 {
4759 case PROCESSOR_700: return 1;
4760 case PROCESSOR_7100: return 2;
4761 case PROCESSOR_7100LC: return 2;
4762 case PROCESSOR_7200: return 2;
4763 case PROCESSOR_7300: return 2;
4764 case PROCESSOR_8000: return 4;
4765
4766 default:
4767 abort ();
4768 }
4769 }
4770
4771
4772
4773 /* Return any length adjustment needed by INSN which already has its length
4774 computed as LENGTH. Return zero if no adjustment is necessary.
4775
4776 For the PA: function calls, millicode calls, and backwards short
4777 conditional branches with unfilled delay slots need an adjustment by +1
4778 (to account for the NOP which will be inserted into the instruction stream).
4779
4780 Also compute the length of an inline block move here as it is too
4781 complicated to express as a length attribute in pa.md. */
4782 int
4783 pa_adjust_insn_length (rtx insn, int length)
4784 {
4785 rtx pat = PATTERN (insn);
4786
4787 /* Jumps inside switch tables which have unfilled delay slots need
4788 adjustment. */
4789 if (GET_CODE (insn) == JUMP_INSN
4790 && GET_CODE (pat) == PARALLEL
4791 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4792 return 4;
4793 /* Millicode insn with an unfilled delay slot. */
4794 else if (GET_CODE (insn) == INSN
4795 && GET_CODE (pat) != SEQUENCE
4796 && GET_CODE (pat) != USE
4797 && GET_CODE (pat) != CLOBBER
4798 && get_attr_type (insn) == TYPE_MILLI)
4799 return 4;
4800 /* Block move pattern. */
4801 else if (GET_CODE (insn) == INSN
4802 && GET_CODE (pat) == PARALLEL
4803 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4804 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4805 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4806 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4807 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4808 return compute_movmem_length (insn) - 4;
4809 /* Block clear pattern. */
4810 else if (GET_CODE (insn) == INSN
4811 && GET_CODE (pat) == PARALLEL
4812 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4813 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4814 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4815 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4816 return compute_clrmem_length (insn) - 4;
4817 /* Conditional branch with an unfilled delay slot. */
4818 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4819 {
4820 /* Adjust a short backwards conditional with an unfilled delay slot. */
4821 if (GET_CODE (pat) == SET
4822 && length == 4
4823 && ! forward_branch_p (insn))
4824 return 4;
4825 else if (GET_CODE (pat) == PARALLEL
4826 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4827 && length == 4)
4828 return 4;
4829 /* Adjust dbra insn with short backwards conditional branch with
4830 unfilled delay slot -- only for case where counter is in a
4831 general register register. */
4832 else if (GET_CODE (pat) == PARALLEL
4833 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4834 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4835 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4836 && length == 4
4837 && ! forward_branch_p (insn))
4838 return 4;
4839 else
4840 return 0;
4841 }
4842 return 0;
4843 }
4844
4845 /* Print operand X (an rtx) in assembler syntax to file FILE.
4846 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4847 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4848
4849 void
4850 print_operand (FILE *file, rtx x, int code)
4851 {
4852 switch (code)
4853 {
4854 case '#':
4855 /* Output a 'nop' if there's nothing for the delay slot. */
4856 if (dbr_sequence_length () == 0)
4857 fputs ("\n\tnop", file);
4858 return;
4859 case '*':
4860 /* Output a nullification completer if there's nothing for the */
4861 /* delay slot or nullification is requested. */
4862 if (dbr_sequence_length () == 0 ||
4863 (final_sequence &&
4864 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4865 fputs (",n", file);
4866 return;
4867 case 'R':
4868 /* Print out the second register name of a register pair.
4869 I.e., R (6) => 7. */
4870 fputs (reg_names[REGNO (x) + 1], file);
4871 return;
4872 case 'r':
4873 /* A register or zero. */
4874 if (x == const0_rtx
4875 || (x == CONST0_RTX (DFmode))
4876 || (x == CONST0_RTX (SFmode)))
4877 {
4878 fputs ("%r0", file);
4879 return;
4880 }
4881 else
4882 break;
4883 case 'f':
4884 /* A register or zero (floating point). */
4885 if (x == const0_rtx
4886 || (x == CONST0_RTX (DFmode))
4887 || (x == CONST0_RTX (SFmode)))
4888 {
4889 fputs ("%fr0", file);
4890 return;
4891 }
4892 else
4893 break;
4894 case 'A':
4895 {
4896 rtx xoperands[2];
4897
4898 xoperands[0] = XEXP (XEXP (x, 0), 0);
4899 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4900 output_global_address (file, xoperands[1], 0);
4901 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4902 return;
4903 }
4904
4905 case 'C': /* Plain (C)ondition */
4906 case 'X':
4907 switch (GET_CODE (x))
4908 {
4909 case EQ:
4910 fputs ("=", file); break;
4911 case NE:
4912 fputs ("<>", file); break;
4913 case GT:
4914 fputs (">", file); break;
4915 case GE:
4916 fputs (">=", file); break;
4917 case GEU:
4918 fputs (">>=", file); break;
4919 case GTU:
4920 fputs (">>", file); break;
4921 case LT:
4922 fputs ("<", file); break;
4923 case LE:
4924 fputs ("<=", file); break;
4925 case LEU:
4926 fputs ("<<=", file); break;
4927 case LTU:
4928 fputs ("<<", file); break;
4929 default:
4930 abort ();
4931 }
4932 return;
4933 case 'N': /* Condition, (N)egated */
4934 switch (GET_CODE (x))
4935 {
4936 case EQ:
4937 fputs ("<>", file); break;
4938 case NE:
4939 fputs ("=", file); break;
4940 case GT:
4941 fputs ("<=", file); break;
4942 case GE:
4943 fputs ("<", file); break;
4944 case GEU:
4945 fputs ("<<", file); break;
4946 case GTU:
4947 fputs ("<<=", file); break;
4948 case LT:
4949 fputs (">=", file); break;
4950 case LE:
4951 fputs (">", file); break;
4952 case LEU:
4953 fputs (">>", file); break;
4954 case LTU:
4955 fputs (">>=", file); break;
4956 default:
4957 abort ();
4958 }
4959 return;
4960 /* For floating point comparisons. Note that the output
4961 predicates are the complement of the desired mode. */
4962 case 'Y':
4963 switch (GET_CODE (x))
4964 {
4965 case EQ:
4966 fputs ("!=", file); break;
4967 case NE:
4968 fputs ("=", file); break;
4969 case GT:
4970 fputs ("!>", file); break;
4971 case GE:
4972 fputs ("!>=", file); break;
4973 case LT:
4974 fputs ("!<", file); break;
4975 case LE:
4976 fputs ("!<=", file); break;
4977 case LTGT:
4978 fputs ("!<>", file); break;
4979 case UNLE:
4980 fputs (">", file); break;
4981 case UNLT:
4982 fputs (">=", file); break;
4983 case UNGE:
4984 fputs ("<", file); break;
4985 case UNGT:
4986 fputs ("<=", file); break;
4987 case UNEQ:
4988 fputs ("<>", file); break;
4989 case UNORDERED:
4990 fputs ("<=>", file); break;
4991 case ORDERED:
4992 fputs ("!<=>", file); break;
4993 default:
4994 abort ();
4995 }
4996 return;
4997 case 'S': /* Condition, operands are (S)wapped. */
4998 switch (GET_CODE (x))
4999 {
5000 case EQ:
5001 fputs ("=", file); break;
5002 case NE:
5003 fputs ("<>", file); break;
5004 case GT:
5005 fputs ("<", file); break;
5006 case GE:
5007 fputs ("<=", file); break;
5008 case GEU:
5009 fputs ("<<=", file); break;
5010 case GTU:
5011 fputs ("<<", file); break;
5012 case LT:
5013 fputs (">", file); break;
5014 case LE:
5015 fputs (">=", file); break;
5016 case LEU:
5017 fputs (">>=", file); break;
5018 case LTU:
5019 fputs (">>", file); break;
5020 default:
5021 abort ();
5022 }
5023 return;
5024 case 'B': /* Condition, (B)oth swapped and negate. */
5025 switch (GET_CODE (x))
5026 {
5027 case EQ:
5028 fputs ("<>", file); break;
5029 case NE:
5030 fputs ("=", file); break;
5031 case GT:
5032 fputs (">=", file); break;
5033 case GE:
5034 fputs (">", file); break;
5035 case GEU:
5036 fputs (">>", file); break;
5037 case GTU:
5038 fputs (">>=", file); break;
5039 case LT:
5040 fputs ("<=", file); break;
5041 case LE:
5042 fputs ("<", file); break;
5043 case LEU:
5044 fputs ("<<", file); break;
5045 case LTU:
5046 fputs ("<<=", file); break;
5047 default:
5048 abort ();
5049 }
5050 return;
5051 case 'k':
5052 if (GET_CODE (x) == CONST_INT)
5053 {
5054 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5055 return;
5056 }
5057 abort ();
5058 case 'Q':
5059 if (GET_CODE (x) == CONST_INT)
5060 {
5061 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5062 return;
5063 }
5064 abort ();
5065 case 'L':
5066 if (GET_CODE (x) == CONST_INT)
5067 {
5068 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5069 return;
5070 }
5071 abort ();
5072 case 'O':
5073 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
5074 {
5075 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5076 return;
5077 }
5078 abort ();
5079 case 'p':
5080 if (GET_CODE (x) == CONST_INT)
5081 {
5082 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5083 return;
5084 }
5085 abort ();
5086 case 'P':
5087 if (GET_CODE (x) == CONST_INT)
5088 {
5089 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5090 return;
5091 }
5092 abort ();
5093 case 'I':
5094 if (GET_CODE (x) == CONST_INT)
5095 fputs ("i", file);
5096 return;
5097 case 'M':
5098 case 'F':
5099 switch (GET_CODE (XEXP (x, 0)))
5100 {
5101 case PRE_DEC:
5102 case PRE_INC:
5103 if (ASSEMBLER_DIALECT == 0)
5104 fputs ("s,mb", file);
5105 else
5106 fputs (",mb", file);
5107 break;
5108 case POST_DEC:
5109 case POST_INC:
5110 if (ASSEMBLER_DIALECT == 0)
5111 fputs ("s,ma", file);
5112 else
5113 fputs (",ma", file);
5114 break;
5115 case PLUS:
5116 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5117 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5118 {
5119 if (ASSEMBLER_DIALECT == 0)
5120 fputs ("x", file);
5121 }
5122 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5123 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5124 {
5125 if (ASSEMBLER_DIALECT == 0)
5126 fputs ("x,s", file);
5127 else
5128 fputs (",s", file);
5129 }
5130 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5131 fputs ("s", file);
5132 break;
5133 default:
5134 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5135 fputs ("s", file);
5136 break;
5137 }
5138 return;
5139 case 'G':
5140 output_global_address (file, x, 0);
5141 return;
5142 case 'H':
5143 output_global_address (file, x, 1);
5144 return;
5145 case 0: /* Don't do anything special */
5146 break;
5147 case 'Z':
5148 {
5149 unsigned op[3];
5150 compute_zdepwi_operands (INTVAL (x), op);
5151 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5152 return;
5153 }
5154 case 'z':
5155 {
5156 unsigned op[3];
5157 compute_zdepdi_operands (INTVAL (x), op);
5158 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5159 return;
5160 }
5161 case 'c':
5162 /* We can get here from a .vtable_inherit due to our
5163 CONSTANT_ADDRESS_P rejecting perfectly good constant
5164 addresses. */
5165 break;
5166 default:
5167 abort ();
5168 }
5169 if (GET_CODE (x) == REG)
5170 {
5171 fputs (reg_names [REGNO (x)], file);
5172 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5173 {
5174 fputs ("R", file);
5175 return;
5176 }
5177 if (FP_REG_P (x)
5178 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5179 && (REGNO (x) & 1) == 0)
5180 fputs ("L", file);
5181 }
5182 else if (GET_CODE (x) == MEM)
5183 {
5184 int size = GET_MODE_SIZE (GET_MODE (x));
5185 rtx base = NULL_RTX;
5186 switch (GET_CODE (XEXP (x, 0)))
5187 {
5188 case PRE_DEC:
5189 case POST_DEC:
5190 base = XEXP (XEXP (x, 0), 0);
5191 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5192 break;
5193 case PRE_INC:
5194 case POST_INC:
5195 base = XEXP (XEXP (x, 0), 0);
5196 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5197 break;
5198 case PLUS:
5199 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5200 fprintf (file, "%s(%s)",
5201 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5202 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5203 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5204 fprintf (file, "%s(%s)",
5205 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5206 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5207 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5208 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5209 {
5210 /* Because the REG_POINTER flag can get lost during reload,
5211 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5212 index and base registers in the combined move patterns. */
5213 rtx base = XEXP (XEXP (x, 0), 1);
5214 rtx index = XEXP (XEXP (x, 0), 0);
5215
5216 fprintf (file, "%s(%s)",
5217 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5218 }
5219 else
5220 output_address (XEXP (x, 0));
5221 break;
5222 default:
5223 output_address (XEXP (x, 0));
5224 break;
5225 }
5226 }
5227 else
5228 output_addr_const (file, x);
5229 }
5230
5231 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5232
5233 void
5234 output_global_address (FILE *file, rtx x, int round_constant)
5235 {
5236
5237 /* Imagine (high (const (plus ...))). */
5238 if (GET_CODE (x) == HIGH)
5239 x = XEXP (x, 0);
5240
5241 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5242 assemble_name (file, XSTR (x, 0));
5243 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5244 {
5245 assemble_name (file, XSTR (x, 0));
5246 fputs ("-$global$", file);
5247 }
5248 else if (GET_CODE (x) == CONST)
5249 {
5250 const char *sep = "";
5251 int offset = 0; /* assembler wants -$global$ at end */
5252 rtx base = NULL_RTX;
5253
5254 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5255 {
5256 base = XEXP (XEXP (x, 0), 0);
5257 output_addr_const (file, base);
5258 }
5259 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
5260 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5261 else abort ();
5262
5263 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
5264 {
5265 base = XEXP (XEXP (x, 0), 1);
5266 output_addr_const (file, base);
5267 }
5268 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
5269 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5270 else abort ();
5271
5272 /* How bogus. The compiler is apparently responsible for
5273 rounding the constant if it uses an LR field selector.
5274
5275 The linker and/or assembler seem a better place since
5276 they have to do this kind of thing already.
5277
5278 If we fail to do this, HP's optimizing linker may eliminate
5279 an addil, but not update the ldw/stw/ldo instruction that
5280 uses the result of the addil. */
5281 if (round_constant)
5282 offset = ((offset + 0x1000) & ~0x1fff);
5283
5284 if (GET_CODE (XEXP (x, 0)) == PLUS)
5285 {
5286 if (offset < 0)
5287 {
5288 offset = -offset;
5289 sep = "-";
5290 }
5291 else
5292 sep = "+";
5293 }
5294 else if (GET_CODE (XEXP (x, 0)) == MINUS
5295 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5296 sep = "-";
5297 else abort ();
5298
5299 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5300 fputs ("-$global$", file);
5301 if (offset)
5302 fprintf (file, "%s%d", sep, offset);
5303 }
5304 else
5305 output_addr_const (file, x);
5306 }
5307
5308 /* Output boilerplate text to appear at the beginning of the file.
5309 There are several possible versions. */
5310 #define aputs(x) fputs(x, asm_out_file)
5311 static inline void
5312 pa_file_start_level (void)
5313 {
5314 if (TARGET_64BIT)
5315 aputs ("\t.LEVEL 2.0w\n");
5316 else if (TARGET_PA_20)
5317 aputs ("\t.LEVEL 2.0\n");
5318 else if (TARGET_PA_11)
5319 aputs ("\t.LEVEL 1.1\n");
5320 else
5321 aputs ("\t.LEVEL 1.0\n");
5322 }
5323
5324 static inline void
5325 pa_file_start_space (int sortspace)
5326 {
5327 aputs ("\t.SPACE $PRIVATE$");
5328 if (sortspace)
5329 aputs (",SORT=16");
5330 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5331 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5332 "\n\t.SPACE $TEXT$");
5333 if (sortspace)
5334 aputs (",SORT=8");
5335 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5336 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5337 }
5338
5339 static inline void
5340 pa_file_start_file (int want_version)
5341 {
5342 if (write_symbols != NO_DEBUG)
5343 {
5344 output_file_directive (asm_out_file, main_input_filename);
5345 if (want_version)
5346 aputs ("\t.version\t\"01.01\"\n");
5347 }
5348 }
5349
5350 static inline void
5351 pa_file_start_mcount (const char *aswhat)
5352 {
5353 if (profile_flag)
5354 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5355 }
5356
5357 static void
5358 pa_elf_file_start (void)
5359 {
5360 pa_file_start_level ();
5361 pa_file_start_mcount ("ENTRY");
5362 pa_file_start_file (0);
5363 }
5364
5365 static void
5366 pa_som_file_start (void)
5367 {
5368 pa_file_start_level ();
5369 pa_file_start_space (0);
5370 aputs ("\t.IMPORT $global$,DATA\n"
5371 "\t.IMPORT $$dyncall,MILLICODE\n");
5372 pa_file_start_mcount ("CODE");
5373 pa_file_start_file (0);
5374 }
5375
5376 static void
5377 pa_linux_file_start (void)
5378 {
5379 pa_file_start_file (1);
5380 pa_file_start_level ();
5381 pa_file_start_mcount ("CODE");
5382 }
5383
5384 static void
5385 pa_hpux64_gas_file_start (void)
5386 {
5387 pa_file_start_level ();
5388 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5389 if (profile_flag)
5390 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5391 #endif
5392 pa_file_start_file (1);
5393 }
5394
5395 static void
5396 pa_hpux64_hpas_file_start (void)
5397 {
5398 pa_file_start_level ();
5399 pa_file_start_space (1);
5400 pa_file_start_mcount ("CODE");
5401 pa_file_start_file (0);
5402 }
5403 #undef aputs
5404
5405 static struct deferred_plabel *
5406 get_plabel (const char *fname)
5407 {
5408 size_t i;
5409
5410 /* See if we have already put this function on the list of deferred
5411 plabels. This list is generally small, so a liner search is not
5412 too ugly. If it proves too slow replace it with something faster. */
5413 for (i = 0; i < n_deferred_plabels; i++)
5414 if (strcmp (fname, deferred_plabels[i].name) == 0)
5415 break;
5416
5417 /* If the deferred plabel list is empty, or this entry was not found
5418 on the list, create a new entry on the list. */
5419 if (deferred_plabels == NULL || i == n_deferred_plabels)
5420 {
5421 const char *real_name;
5422
5423 if (deferred_plabels == 0)
5424 deferred_plabels = (struct deferred_plabel *)
5425 ggc_alloc (sizeof (struct deferred_plabel));
5426 else
5427 deferred_plabels = (struct deferred_plabel *)
5428 ggc_realloc (deferred_plabels,
5429 ((n_deferred_plabels + 1)
5430 * sizeof (struct deferred_plabel)));
5431
5432 i = n_deferred_plabels++;
5433 deferred_plabels[i].internal_label = gen_label_rtx ();
5434 deferred_plabels[i].name = ggc_strdup (fname);
5435
5436 /* Gross. We have just implicitly taken the address of this function,
5437 mark it as such. */
5438 real_name = (*targetm.strip_name_encoding) (fname);
5439 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5440 }
5441
5442 return &deferred_plabels[i];
5443 }
5444
5445 static void
5446 output_deferred_plabels (void)
5447 {
5448 size_t i;
5449 /* If we have deferred plabels, then we need to switch into the data
5450 section and align it to a 4 byte boundary before we output the
5451 deferred plabels. */
5452 if (n_deferred_plabels)
5453 {
5454 data_section ();
5455 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5456 }
5457
5458 /* Now output the deferred plabels. */
5459 for (i = 0; i < n_deferred_plabels; i++)
5460 {
5461 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5462 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5463 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
5464 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5465 }
5466 }
5467
5468 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5469 /* Initialize optabs to point to HPUX long double emulation routines. */
5470 static void
5471 pa_hpux_init_libfuncs (void)
5472 {
5473 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5474 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5475 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5476 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5477 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5478 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5479 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5480 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5481 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5482
5483 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5484 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5485 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5486 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5487 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5488 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5489 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5490
5491 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5492 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5493 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5494 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5495
5496 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5497 ? "__U_Qfcnvfxt_quad_to_sgl"
5498 : "_U_Qfcnvfxt_quad_to_sgl");
5499 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5500 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5501 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5502
5503 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5504 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5505 }
5506 #endif
5507
5508 /* HP's millicode routines mean something special to the assembler.
5509 Keep track of which ones we have used. */
5510
5511 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5512 static void import_milli (enum millicodes);
5513 static char imported[(int) end1000];
5514 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5515 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5516 #define MILLI_START 10
5517
5518 static void
5519 import_milli (enum millicodes code)
5520 {
5521 char str[sizeof (import_string)];
5522
5523 if (!imported[(int) code])
5524 {
5525 imported[(int) code] = 1;
5526 strcpy (str, import_string);
5527 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5528 output_asm_insn (str, 0);
5529 }
5530 }
5531
5532 /* The register constraints have put the operands and return value in
5533 the proper registers. */
5534
5535 const char *
5536 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5537 {
5538 import_milli (mulI);
5539 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5540 }
5541
5542 /* Emit the rtl for doing a division by a constant. */
5543
5544 /* Do magic division millicodes exist for this value? */
5545 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
5546 1, 1};
5547
5548 /* We'll use an array to keep track of the magic millicodes and
5549 whether or not we've used them already. [n][0] is signed, [n][1] is
5550 unsigned. */
5551
5552 static int div_milli[16][2];
5553
5554 int
5555 div_operand (rtx op, enum machine_mode mode)
5556 {
5557 return (mode == SImode
5558 && ((GET_CODE (op) == REG && REGNO (op) == 25)
5559 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
5560 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
5561 }
5562
5563 int
5564 emit_hpdiv_const (rtx *operands, int unsignedp)
5565 {
5566 if (GET_CODE (operands[2]) == CONST_INT
5567 && INTVAL (operands[2]) > 0
5568 && INTVAL (operands[2]) < 16
5569 && magic_milli[INTVAL (operands[2])])
5570 {
5571 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5572
5573 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5574 emit
5575 (gen_rtx_PARALLEL
5576 (VOIDmode,
5577 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5578 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5579 SImode,
5580 gen_rtx_REG (SImode, 26),
5581 operands[2])),
5582 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5583 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5584 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5585 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5586 gen_rtx_CLOBBER (VOIDmode, ret))));
5587 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5588 return 1;
5589 }
5590 return 0;
5591 }
5592
5593 const char *
5594 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5595 {
5596 int divisor;
5597
5598 /* If the divisor is a constant, try to use one of the special
5599 opcodes .*/
5600 if (GET_CODE (operands[0]) == CONST_INT)
5601 {
5602 static char buf[100];
5603 divisor = INTVAL (operands[0]);
5604 if (!div_milli[divisor][unsignedp])
5605 {
5606 div_milli[divisor][unsignedp] = 1;
5607 if (unsignedp)
5608 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5609 else
5610 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5611 }
5612 if (unsignedp)
5613 {
5614 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5615 INTVAL (operands[0]));
5616 return output_millicode_call (insn,
5617 gen_rtx_SYMBOL_REF (SImode, buf));
5618 }
5619 else
5620 {
5621 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5622 INTVAL (operands[0]));
5623 return output_millicode_call (insn,
5624 gen_rtx_SYMBOL_REF (SImode, buf));
5625 }
5626 }
5627 /* Divisor isn't a special constant. */
5628 else
5629 {
5630 if (unsignedp)
5631 {
5632 import_milli (divU);
5633 return output_millicode_call (insn,
5634 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5635 }
5636 else
5637 {
5638 import_milli (divI);
5639 return output_millicode_call (insn,
5640 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5641 }
5642 }
5643 }
5644
5645 /* Output a $$rem millicode to do mod. */
5646
5647 const char *
5648 output_mod_insn (int unsignedp, rtx insn)
5649 {
5650 if (unsignedp)
5651 {
5652 import_milli (remU);
5653 return output_millicode_call (insn,
5654 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5655 }
5656 else
5657 {
5658 import_milli (remI);
5659 return output_millicode_call (insn,
5660 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5661 }
5662 }
5663
5664 void
5665 output_arg_descriptor (rtx call_insn)
5666 {
5667 const char *arg_regs[4];
5668 enum machine_mode arg_mode;
5669 rtx link;
5670 int i, output_flag = 0;
5671 int regno;
5672
5673 /* We neither need nor want argument location descriptors for the
5674 64bit runtime environment or the ELF32 environment. */
5675 if (TARGET_64BIT || TARGET_ELF32)
5676 return;
5677
5678 for (i = 0; i < 4; i++)
5679 arg_regs[i] = 0;
5680
5681 /* Specify explicitly that no argument relocations should take place
5682 if using the portable runtime calling conventions. */
5683 if (TARGET_PORTABLE_RUNTIME)
5684 {
5685 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5686 asm_out_file);
5687 return;
5688 }
5689
5690 if (GET_CODE (call_insn) != CALL_INSN)
5691 abort ();
5692 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
5693 {
5694 rtx use = XEXP (link, 0);
5695
5696 if (! (GET_CODE (use) == USE
5697 && GET_CODE (XEXP (use, 0)) == REG
5698 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5699 continue;
5700
5701 arg_mode = GET_MODE (XEXP (use, 0));
5702 regno = REGNO (XEXP (use, 0));
5703 if (regno >= 23 && regno <= 26)
5704 {
5705 arg_regs[26 - regno] = "GR";
5706 if (arg_mode == DImode)
5707 arg_regs[25 - regno] = "GR";
5708 }
5709 else if (regno >= 32 && regno <= 39)
5710 {
5711 if (arg_mode == SFmode)
5712 arg_regs[(regno - 32) / 2] = "FR";
5713 else
5714 {
5715 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5716 arg_regs[(regno - 34) / 2] = "FR";
5717 arg_regs[(regno - 34) / 2 + 1] = "FU";
5718 #else
5719 arg_regs[(regno - 34) / 2] = "FU";
5720 arg_regs[(regno - 34) / 2 + 1] = "FR";
5721 #endif
5722 }
5723 }
5724 }
5725 fputs ("\t.CALL ", asm_out_file);
5726 for (i = 0; i < 4; i++)
5727 {
5728 if (arg_regs[i])
5729 {
5730 if (output_flag++)
5731 fputc (',', asm_out_file);
5732 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5733 }
5734 }
5735 fputc ('\n', asm_out_file);
5736 }
5737 \f
5738 /* Return the class of any secondary reload register that is needed to
5739 move IN into a register in class CLASS using mode MODE.
5740
5741 Profiling has showed this routine and its descendants account for
5742 a significant amount of compile time (~7%). So it has been
5743 optimized to reduce redundant computations and eliminate useless
5744 function calls.
5745
5746 It might be worthwhile to try and make this a leaf function too. */
5747
5748 enum reg_class
5749 secondary_reload_class (enum reg_class class, enum machine_mode mode, rtx in)
5750 {
5751 int regno, is_symbolic;
5752
5753 /* Trying to load a constant into a FP register during PIC code
5754 generation will require %r1 as a scratch register. */
5755 if (flag_pic
5756 && GET_MODE_CLASS (mode) == MODE_INT
5757 && FP_REG_CLASS_P (class)
5758 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5759 return R1_REGS;
5760
5761 /* Profiling showed the PA port spends about 1.3% of its compilation
5762 time in true_regnum from calls inside secondary_reload_class. */
5763
5764 if (GET_CODE (in) == REG)
5765 {
5766 regno = REGNO (in);
5767 if (regno >= FIRST_PSEUDO_REGISTER)
5768 regno = true_regnum (in);
5769 }
5770 else if (GET_CODE (in) == SUBREG)
5771 regno = true_regnum (in);
5772 else
5773 regno = -1;
5774
5775 /* If we have something like (mem (mem (...)), we can safely assume the
5776 inner MEM will end up in a general register after reloading, so there's
5777 no need for a secondary reload. */
5778 if (GET_CODE (in) == MEM
5779 && GET_CODE (XEXP (in, 0)) == MEM)
5780 return NO_REGS;
5781
5782 /* Handle out of range displacement for integer mode loads/stores of
5783 FP registers. */
5784 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5785 && GET_MODE_CLASS (mode) == MODE_INT
5786 && FP_REG_CLASS_P (class))
5787 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5788 return GENERAL_REGS;
5789
5790 /* A SAR<->FP register copy requires a secondary register (GPR) as
5791 well as secondary memory. */
5792 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5793 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5794 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5795 return GENERAL_REGS;
5796
5797 if (GET_CODE (in) == HIGH)
5798 in = XEXP (in, 0);
5799
5800 /* Profiling has showed GCC spends about 2.6% of its compilation
5801 time in symbolic_operand from calls inside secondary_reload_class.
5802
5803 We use an inline copy and only compute its return value once to avoid
5804 useless work. */
5805 switch (GET_CODE (in))
5806 {
5807 rtx tmp;
5808
5809 case SYMBOL_REF:
5810 case LABEL_REF:
5811 is_symbolic = 1;
5812 break;
5813 case CONST:
5814 tmp = XEXP (in, 0);
5815 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5816 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5817 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5818 break;
5819
5820 default:
5821 is_symbolic = 0;
5822 break;
5823 }
5824
5825 if (!flag_pic
5826 && is_symbolic
5827 && read_only_operand (in, VOIDmode))
5828 return NO_REGS;
5829
5830 if (class != R1_REGS && is_symbolic)
5831 return R1_REGS;
5832
5833 return NO_REGS;
5834 }
5835
5836 enum direction
5837 function_arg_padding (enum machine_mode mode, tree type)
5838 {
5839 if (mode == BLKmode
5840 || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5841 {
5842 /* Return none if justification is not required. */
5843 if (type
5844 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5845 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5846 return none;
5847
5848 /* The directions set here are ignored when a BLKmode argument larger
5849 than a word is placed in a register. Different code is used for
5850 the stack and registers. This makes it difficult to have a
5851 consistent data representation for both the stack and registers.
5852 For both runtimes, the justification and padding for arguments on
5853 the stack and in registers should be identical. */
5854 if (TARGET_64BIT)
5855 /* The 64-bit runtime specifies left justification for aggregates. */
5856 return upward;
5857 else
5858 /* The 32-bit runtime architecture specifies right justification.
5859 When the argument is passed on the stack, the argument is padded
5860 with garbage on the left. The HP compiler pads with zeros. */
5861 return downward;
5862 }
5863
5864 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5865 return downward;
5866 else
5867 return none;
5868 }
5869
5870 \f
5871 /* Do what is necessary for `va_start'. We look at the current function
5872 to determine if stdargs or varargs is used and fill in an initial
5873 va_list. A pointer to this constructor is returned. */
5874
5875 static rtx
5876 hppa_builtin_saveregs (void)
5877 {
5878 rtx offset, dest;
5879 tree fntype = TREE_TYPE (current_function_decl);
5880 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5881 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5882 != void_type_node)))
5883 ? UNITS_PER_WORD : 0);
5884
5885 if (argadj)
5886 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5887 else
5888 offset = current_function_arg_offset_rtx;
5889
5890 if (TARGET_64BIT)
5891 {
5892 int i, off;
5893
5894 /* Adjust for varargs/stdarg differences. */
5895 if (argadj)
5896 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5897 else
5898 offset = current_function_arg_offset_rtx;
5899
5900 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5901 from the incoming arg pointer and growing to larger addresses. */
5902 for (i = 26, off = -64; i >= 19; i--, off += 8)
5903 emit_move_insn (gen_rtx_MEM (word_mode,
5904 plus_constant (arg_pointer_rtx, off)),
5905 gen_rtx_REG (word_mode, i));
5906
5907 /* The incoming args pointer points just beyond the flushback area;
5908 normally this is not a serious concern. However, when we are doing
5909 varargs/stdargs we want to make the arg pointer point to the start
5910 of the incoming argument area. */
5911 emit_move_insn (virtual_incoming_args_rtx,
5912 plus_constant (arg_pointer_rtx, -64));
5913
5914 /* Now return a pointer to the first anonymous argument. */
5915 return copy_to_reg (expand_binop (Pmode, add_optab,
5916 virtual_incoming_args_rtx,
5917 offset, 0, 0, OPTAB_LIB_WIDEN));
5918 }
5919
5920 /* Store general registers on the stack. */
5921 dest = gen_rtx_MEM (BLKmode,
5922 plus_constant (current_function_internal_arg_pointer,
5923 -16));
5924 set_mem_alias_set (dest, get_varargs_alias_set ());
5925 set_mem_align (dest, BITS_PER_WORD);
5926 move_block_from_reg (23, dest, 4);
5927
5928 /* move_block_from_reg will emit code to store the argument registers
5929 individually as scalar stores.
5930
5931 However, other insns may later load from the same addresses for
5932 a structure load (passing a struct to a varargs routine).
5933
5934 The alias code assumes that such aliasing can never happen, so we
5935 have to keep memory referencing insns from moving up beyond the
5936 last argument register store. So we emit a blockage insn here. */
5937 emit_insn (gen_blockage ());
5938
5939 return copy_to_reg (expand_binop (Pmode, add_optab,
5940 current_function_internal_arg_pointer,
5941 offset, 0, 0, OPTAB_LIB_WIDEN));
5942 }
5943
5944 void
5945 hppa_va_start (tree valist, rtx nextarg)
5946 {
5947 nextarg = expand_builtin_saveregs ();
5948 std_expand_builtin_va_start (valist, nextarg);
5949 }
5950
5951 rtx
5952 hppa_va_arg (tree valist, tree type)
5953 {
5954 HOST_WIDE_INT size = int_size_in_bytes (type);
5955 HOST_WIDE_INT ofs;
5956 tree t, ptr, pptr;
5957
5958 if (TARGET_64BIT)
5959 {
5960 /* Every argument in PA64 is supposed to be passed by value
5961 (including large structs). However, as a GCC extension, we
5962 pass zero and variable sized arguments by reference. Empty
5963 structures are a GCC extension not supported by the HP
5964 compilers. Thus, passing them by reference isn't likely
5965 to conflict with the ABI. For variable sized arguments,
5966 GCC doesn't have the infrastructure to allocate these to
5967 registers. */
5968
5969 /* Arguments with a size greater than 8 must be aligned 0 MOD 16. */
5970
5971 if (size > UNITS_PER_WORD)
5972 {
5973 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5974 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5975 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5976 build_int_2 (-2 * UNITS_PER_WORD, -1));
5977 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5978 TREE_SIDE_EFFECTS (t) = 1;
5979 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5980 }
5981
5982 if (size > 0)
5983 return std_expand_builtin_va_arg (valist, type);
5984 else
5985 {
5986 ptr = build_pointer_type (type);
5987
5988 /* Args grow upward. */
5989 t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
5990 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5991 TREE_SIDE_EFFECTS (t) = 1;
5992
5993 pptr = build_pointer_type (ptr);
5994 t = build1 (NOP_EXPR, pptr, t);
5995 TREE_SIDE_EFFECTS (t) = 1;
5996
5997 t = build1 (INDIRECT_REF, ptr, t);
5998 TREE_SIDE_EFFECTS (t) = 1;
5999 }
6000 }
6001 else /* !TARGET_64BIT */
6002 {
6003 ptr = build_pointer_type (type);
6004
6005 /* "Large" and variable sized types are passed by reference. */
6006 if (size > 8 || size <= 0)
6007 {
6008 /* Args grow downward. */
6009 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
6010 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
6011 TREE_SIDE_EFFECTS (t) = 1;
6012
6013 pptr = build_pointer_type (ptr);
6014 t = build1 (NOP_EXPR, pptr, t);
6015 TREE_SIDE_EFFECTS (t) = 1;
6016
6017 t = build1 (INDIRECT_REF, ptr, t);
6018 TREE_SIDE_EFFECTS (t) = 1;
6019 }
6020 else
6021 {
6022 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
6023 build_int_2 (-size, -1));
6024
6025 /* Copied from va-pa.h, but we probably don't need to align to
6026 word size, since we generate and preserve that invariant. */
6027 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
6028 build_int_2 ((size > 4 ? -8 : -4), -1));
6029
6030 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6031 TREE_SIDE_EFFECTS (t) = 1;
6032
6033 ofs = (8 - size) % 4;
6034 if (ofs)
6035 {
6036 t = build (PLUS_EXPR, TREE_TYPE (valist), t,
6037 build_int_2 (ofs, 0));
6038 TREE_SIDE_EFFECTS (t) = 1;
6039 }
6040
6041 t = build1 (NOP_EXPR, ptr, t);
6042 TREE_SIDE_EFFECTS (t) = 1;
6043 }
6044 }
6045
6046 /* Calculate! */
6047 return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6048 }
6049
6050
6051
6052 /* This routine handles all the normal conditional branch sequences we
6053 might need to generate. It handles compare immediate vs compare
6054 register, nullification of delay slots, varying length branches,
6055 negated branches, and all combinations of the above. It returns the
6056 output appropriate to emit the branch corresponding to all given
6057 parameters. */
6058
6059 const char *
6060 output_cbranch (rtx *operands, int nullify, int length, int negated, rtx insn)
6061 {
6062 static char buf[100];
6063 int useskip = 0;
6064 rtx xoperands[5];
6065
6066 /* A conditional branch to the following instruction (eg the delay slot)
6067 is asking for a disaster. This can happen when not optimizing and
6068 when jump optimization fails.
6069
6070 While it is usually safe to emit nothing, this can fail if the
6071 preceding instruction is a nullified branch with an empty delay
6072 slot and the same branch target as this branch. We could check
6073 for this but jump optimization should eliminate nop jumps. It
6074 is always safe to emit a nop. */
6075 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6076 return "nop";
6077
6078 /* The doubleword form of the cmpib instruction doesn't have the LEU
6079 and GTU conditions while the cmpb instruction does. Since we accept
6080 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6081 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6082 operands[2] = gen_rtx_REG (DImode, 0);
6083
6084 /* If this is a long branch with its delay slot unfilled, set `nullify'
6085 as it can nullify the delay slot and save a nop. */
6086 if (length == 8 && dbr_sequence_length () == 0)
6087 nullify = 1;
6088
6089 /* If this is a short forward conditional branch which did not get
6090 its delay slot filled, the delay slot can still be nullified. */
6091 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6092 nullify = forward_branch_p (insn);
6093
6094 /* A forward branch over a single nullified insn can be done with a
6095 comclr instruction. This avoids a single cycle penalty due to
6096 mis-predicted branch if we fall through (branch not taken). */
6097 if (length == 4
6098 && next_real_insn (insn) != 0
6099 && get_attr_length (next_real_insn (insn)) == 4
6100 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6101 && nullify)
6102 useskip = 1;
6103
6104 switch (length)
6105 {
6106 /* All short conditional branches except backwards with an unfilled
6107 delay slot. */
6108 case 4:
6109 if (useskip)
6110 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6111 else
6112 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6113 if (GET_MODE (operands[1]) == DImode)
6114 strcat (buf, "*");
6115 if (negated)
6116 strcat (buf, "%B3");
6117 else
6118 strcat (buf, "%S3");
6119 if (useskip)
6120 strcat (buf, " %2,%r1,%%r0");
6121 else if (nullify)
6122 strcat (buf, ",n %2,%r1,%0");
6123 else
6124 strcat (buf, " %2,%r1,%0");
6125 break;
6126
6127 /* All long conditionals. Note a short backward branch with an
6128 unfilled delay slot is treated just like a long backward branch
6129 with an unfilled delay slot. */
6130 case 8:
6131 /* Handle weird backwards branch with a filled delay slot
6132 with is nullified. */
6133 if (dbr_sequence_length () != 0
6134 && ! forward_branch_p (insn)
6135 && nullify)
6136 {
6137 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6138 if (GET_MODE (operands[1]) == DImode)
6139 strcat (buf, "*");
6140 if (negated)
6141 strcat (buf, "%S3");
6142 else
6143 strcat (buf, "%B3");
6144 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6145 }
6146 /* Handle short backwards branch with an unfilled delay slot.
6147 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6148 taken and untaken branches. */
6149 else if (dbr_sequence_length () == 0
6150 && ! forward_branch_p (insn)
6151 && INSN_ADDRESSES_SET_P ()
6152 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6153 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6154 {
6155 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6156 if (GET_MODE (operands[1]) == DImode)
6157 strcat (buf, "*");
6158 if (negated)
6159 strcat (buf, "%B3 %2,%r1,%0%#");
6160 else
6161 strcat (buf, "%S3 %2,%r1,%0%#");
6162 }
6163 else
6164 {
6165 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6166 if (GET_MODE (operands[1]) == DImode)
6167 strcat (buf, "*");
6168 if (negated)
6169 strcat (buf, "%S3");
6170 else
6171 strcat (buf, "%B3");
6172 if (nullify)
6173 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6174 else
6175 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6176 }
6177 break;
6178
6179 case 20:
6180 case 28:
6181 xoperands[0] = operands[0];
6182 xoperands[1] = operands[1];
6183 xoperands[2] = operands[2];
6184 xoperands[3] = operands[3];
6185
6186 /* The reversed conditional branch must branch over one additional
6187 instruction if the delay slot is filled. If the delay slot
6188 is empty, the instruction after the reversed condition branch
6189 must be nullified. */
6190 nullify = dbr_sequence_length () == 0;
6191 xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
6192
6193 /* Create a reversed conditional branch which branches around
6194 the following insns. */
6195 if (GET_MODE (operands[1]) != DImode)
6196 {
6197 if (nullify)
6198 {
6199 if (negated)
6200 strcpy (buf,
6201 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6202 else
6203 strcpy (buf,
6204 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6205 }
6206 else
6207 {
6208 if (negated)
6209 strcpy (buf,
6210 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6211 else
6212 strcpy (buf,
6213 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6214 }
6215 }
6216 else
6217 {
6218 if (nullify)
6219 {
6220 if (negated)
6221 strcpy (buf,
6222 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6223 else
6224 strcpy (buf,
6225 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6226 }
6227 else
6228 {
6229 if (negated)
6230 strcpy (buf,
6231 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6232 else
6233 strcpy (buf,
6234 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6235 }
6236 }
6237
6238 output_asm_insn (buf, xoperands);
6239 return output_lbranch (operands[0], insn);
6240
6241 default:
6242 abort ();
6243 }
6244 return buf;
6245 }
6246
6247 /* This routine handles long unconditional branches that exceed the
6248 maximum range of a simple branch instruction. */
6249
6250 const char *
6251 output_lbranch (rtx dest, rtx insn)
6252 {
6253 rtx xoperands[2];
6254
6255 xoperands[0] = dest;
6256
6257 /* First, free up the delay slot. */
6258 if (dbr_sequence_length () != 0)
6259 {
6260 /* We can't handle a jump in the delay slot. */
6261 if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
6262 abort ();
6263
6264 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6265 optimize, 0, 0, NULL);
6266
6267 /* Now delete the delay insn. */
6268 PUT_CODE (NEXT_INSN (insn), NOTE);
6269 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6270 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6271 }
6272
6273 /* Output an insn to save %r1. The runtime documentation doesn't
6274 specify whether the "Clean Up" slot in the callers frame can
6275 be clobbered by the callee. It isn't copied by HP's builtin
6276 alloca, so this suggests that it can be clobbered if necessary.
6277 The "Static Link" location is copied by HP builtin alloca, so
6278 we avoid using it. Using the cleanup slot might be a problem
6279 if we have to interoperate with languages that pass cleanup
6280 information. However, it should be possible to handle these
6281 situations with GCC's asm feature.
6282
6283 The "Current RP" slot is reserved for the called procedure, so
6284 we try to use it when we don't have a frame of our own. It's
6285 rather unlikely that we won't have a frame when we need to emit
6286 a very long branch.
6287
6288 Really the way to go long term is a register scavenger; goto
6289 the target of the jump and find a register which we can use
6290 as a scratch to hold the value in %r1. Then, we wouldn't have
6291 to free up the delay slot or clobber a slot that may be needed
6292 for other purposes. */
6293 if (TARGET_64BIT)
6294 {
6295 if (actual_fsize == 0 && !regs_ever_live[2])
6296 /* Use the return pointer slot in the frame marker. */
6297 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6298 else
6299 /* Use the slot at -40 in the frame marker since HP builtin
6300 alloca doesn't copy it. */
6301 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6302 }
6303 else
6304 {
6305 if (actual_fsize == 0 && !regs_ever_live[2])
6306 /* Use the return pointer slot in the frame marker. */
6307 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6308 else
6309 /* Use the "Clean Up" slot in the frame marker. In GCC,
6310 the only other use of this location is for copying a
6311 floating point double argument from a floating-point
6312 register to two general registers. The copy is done
6313 as an "atomic" operation when outputting a call, so it
6314 won't interfere with our using the location here. */
6315 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6316 }
6317
6318 if (TARGET_PORTABLE_RUNTIME)
6319 {
6320 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6321 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6322 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6323 }
6324 else if (flag_pic)
6325 {
6326 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6327 if (TARGET_SOM || !TARGET_GAS)
6328 {
6329 xoperands[1] = gen_label_rtx ();
6330 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6331 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6332 CODE_LABEL_NUMBER (xoperands[1]));
6333 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6334 }
6335 else
6336 {
6337 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6338 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6339 }
6340 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6341 }
6342 else
6343 /* Now output a very long branch to the original target. */
6344 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6345
6346 /* Now restore the value of %r1 in the delay slot. */
6347 if (TARGET_64BIT)
6348 {
6349 if (actual_fsize == 0 && !regs_ever_live[2])
6350 return "ldd -16(%%r30),%%r1";
6351 else
6352 return "ldd -40(%%r30),%%r1";
6353 }
6354 else
6355 {
6356 if (actual_fsize == 0 && !regs_ever_live[2])
6357 return "ldw -20(%%r30),%%r1";
6358 else
6359 return "ldw -12(%%r30),%%r1";
6360 }
6361 }
6362
6363 /* This routine handles all the branch-on-bit conditional branch sequences we
6364 might need to generate. It handles nullification of delay slots,
6365 varying length branches, negated branches and all combinations of the
6366 above. it returns the appropriate output template to emit the branch. */
6367
6368 const char *
6369 output_bb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6370 int negated, rtx insn, int which)
6371 {
6372 static char buf[100];
6373 int useskip = 0;
6374
6375 /* A conditional branch to the following instruction (eg the delay slot) is
6376 asking for a disaster. I do not think this can happen as this pattern
6377 is only used when optimizing; jump optimization should eliminate the
6378 jump. But be prepared just in case. */
6379
6380 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6381 return "nop";
6382
6383 /* If this is a long branch with its delay slot unfilled, set `nullify'
6384 as it can nullify the delay slot and save a nop. */
6385 if (length == 8 && dbr_sequence_length () == 0)
6386 nullify = 1;
6387
6388 /* If this is a short forward conditional branch which did not get
6389 its delay slot filled, the delay slot can still be nullified. */
6390 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6391 nullify = forward_branch_p (insn);
6392
6393 /* A forward branch over a single nullified insn can be done with a
6394 extrs instruction. This avoids a single cycle penalty due to
6395 mis-predicted branch if we fall through (branch not taken). */
6396
6397 if (length == 4
6398 && next_real_insn (insn) != 0
6399 && get_attr_length (next_real_insn (insn)) == 4
6400 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6401 && nullify)
6402 useskip = 1;
6403
6404 switch (length)
6405 {
6406
6407 /* All short conditional branches except backwards with an unfilled
6408 delay slot. */
6409 case 4:
6410 if (useskip)
6411 strcpy (buf, "{extrs,|extrw,s,}");
6412 else
6413 strcpy (buf, "bb,");
6414 if (useskip && GET_MODE (operands[0]) == DImode)
6415 strcpy (buf, "extrd,s,*");
6416 else if (GET_MODE (operands[0]) == DImode)
6417 strcpy (buf, "bb,*");
6418 if ((which == 0 && negated)
6419 || (which == 1 && ! negated))
6420 strcat (buf, ">=");
6421 else
6422 strcat (buf, "<");
6423 if (useskip)
6424 strcat (buf, " %0,%1,1,%%r0");
6425 else if (nullify && negated)
6426 strcat (buf, ",n %0,%1,%3");
6427 else if (nullify && ! negated)
6428 strcat (buf, ",n %0,%1,%2");
6429 else if (! nullify && negated)
6430 strcat (buf, "%0,%1,%3");
6431 else if (! nullify && ! negated)
6432 strcat (buf, " %0,%1,%2");
6433 break;
6434
6435 /* All long conditionals. Note a short backward branch with an
6436 unfilled delay slot is treated just like a long backward branch
6437 with an unfilled delay slot. */
6438 case 8:
6439 /* Handle weird backwards branch with a filled delay slot
6440 with is nullified. */
6441 if (dbr_sequence_length () != 0
6442 && ! forward_branch_p (insn)
6443 && nullify)
6444 {
6445 strcpy (buf, "bb,");
6446 if (GET_MODE (operands[0]) == DImode)
6447 strcat (buf, "*");
6448 if ((which == 0 && negated)
6449 || (which == 1 && ! negated))
6450 strcat (buf, "<");
6451 else
6452 strcat (buf, ">=");
6453 if (negated)
6454 strcat (buf, ",n %0,%1,.+12\n\tb %3");
6455 else
6456 strcat (buf, ",n %0,%1,.+12\n\tb %2");
6457 }
6458 /* Handle short backwards branch with an unfilled delay slot.
6459 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6460 taken and untaken branches. */
6461 else if (dbr_sequence_length () == 0
6462 && ! forward_branch_p (insn)
6463 && INSN_ADDRESSES_SET_P ()
6464 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6465 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6466 {
6467 strcpy (buf, "bb,");
6468 if (GET_MODE (operands[0]) == DImode)
6469 strcat (buf, "*");
6470 if ((which == 0 && negated)
6471 || (which == 1 && ! negated))
6472 strcat (buf, ">=");
6473 else
6474 strcat (buf, "<");
6475 if (negated)
6476 strcat (buf, " %0,%1,%3%#");
6477 else
6478 strcat (buf, " %0,%1,%2%#");
6479 }
6480 else
6481 {
6482 strcpy (buf, "{extrs,|extrw,s,}");
6483 if (GET_MODE (operands[0]) == DImode)
6484 strcpy (buf, "extrd,s,*");
6485 if ((which == 0 && negated)
6486 || (which == 1 && ! negated))
6487 strcat (buf, "<");
6488 else
6489 strcat (buf, ">=");
6490 if (nullify && negated)
6491 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6492 else if (nullify && ! negated)
6493 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6494 else if (negated)
6495 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6496 else
6497 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6498 }
6499 break;
6500
6501 default:
6502 abort ();
6503 }
6504 return buf;
6505 }
6506
6507 /* This routine handles all the branch-on-variable-bit conditional branch
6508 sequences we might need to generate. It handles nullification of delay
6509 slots, varying length branches, negated branches and all combinations
6510 of the above. it returns the appropriate output template to emit the
6511 branch. */
6512
6513 const char *
6514 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int nullify, int length,
6515 int negated, rtx insn, int which)
6516 {
6517 static char buf[100];
6518 int useskip = 0;
6519
6520 /* A conditional branch to the following instruction (eg the delay slot) is
6521 asking for a disaster. I do not think this can happen as this pattern
6522 is only used when optimizing; jump optimization should eliminate the
6523 jump. But be prepared just in case. */
6524
6525 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6526 return "nop";
6527
6528 /* If this is a long branch with its delay slot unfilled, set `nullify'
6529 as it can nullify the delay slot and save a nop. */
6530 if (length == 8 && dbr_sequence_length () == 0)
6531 nullify = 1;
6532
6533 /* If this is a short forward conditional branch which did not get
6534 its delay slot filled, the delay slot can still be nullified. */
6535 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6536 nullify = forward_branch_p (insn);
6537
6538 /* A forward branch over a single nullified insn can be done with a
6539 extrs instruction. This avoids a single cycle penalty due to
6540 mis-predicted branch if we fall through (branch not taken). */
6541
6542 if (length == 4
6543 && next_real_insn (insn) != 0
6544 && get_attr_length (next_real_insn (insn)) == 4
6545 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6546 && nullify)
6547 useskip = 1;
6548
6549 switch (length)
6550 {
6551
6552 /* All short conditional branches except backwards with an unfilled
6553 delay slot. */
6554 case 4:
6555 if (useskip)
6556 strcpy (buf, "{vextrs,|extrw,s,}");
6557 else
6558 strcpy (buf, "{bvb,|bb,}");
6559 if (useskip && GET_MODE (operands[0]) == DImode)
6560 strcpy (buf, "extrd,s,*");
6561 else if (GET_MODE (operands[0]) == DImode)
6562 strcpy (buf, "bb,*");
6563 if ((which == 0 && negated)
6564 || (which == 1 && ! negated))
6565 strcat (buf, ">=");
6566 else
6567 strcat (buf, "<");
6568 if (useskip)
6569 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6570 else if (nullify && negated)
6571 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6572 else if (nullify && ! negated)
6573 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6574 else if (! nullify && negated)
6575 strcat (buf, "{%0,%3|%0,%%sar,%3}");
6576 else if (! nullify && ! negated)
6577 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6578 break;
6579
6580 /* All long conditionals. Note a short backward branch with an
6581 unfilled delay slot is treated just like a long backward branch
6582 with an unfilled delay slot. */
6583 case 8:
6584 /* Handle weird backwards branch with a filled delay slot
6585 with is nullified. */
6586 if (dbr_sequence_length () != 0
6587 && ! forward_branch_p (insn)
6588 && nullify)
6589 {
6590 strcpy (buf, "{bvb,|bb,}");
6591 if (GET_MODE (operands[0]) == DImode)
6592 strcat (buf, "*");
6593 if ((which == 0 && negated)
6594 || (which == 1 && ! negated))
6595 strcat (buf, "<");
6596 else
6597 strcat (buf, ">=");
6598 if (negated)
6599 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6600 else
6601 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6602 }
6603 /* Handle short backwards branch with an unfilled delay slot.
6604 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6605 taken and untaken branches. */
6606 else if (dbr_sequence_length () == 0
6607 && ! forward_branch_p (insn)
6608 && INSN_ADDRESSES_SET_P ()
6609 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6610 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6611 {
6612 strcpy (buf, "{bvb,|bb,}");
6613 if (GET_MODE (operands[0]) == DImode)
6614 strcat (buf, "*");
6615 if ((which == 0 && negated)
6616 || (which == 1 && ! negated))
6617 strcat (buf, ">=");
6618 else
6619 strcat (buf, "<");
6620 if (negated)
6621 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6622 else
6623 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6624 }
6625 else
6626 {
6627 strcpy (buf, "{vextrs,|extrw,s,}");
6628 if (GET_MODE (operands[0]) == DImode)
6629 strcpy (buf, "extrd,s,*");
6630 if ((which == 0 && negated)
6631 || (which == 1 && ! negated))
6632 strcat (buf, "<");
6633 else
6634 strcat (buf, ">=");
6635 if (nullify && negated)
6636 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6637 else if (nullify && ! negated)
6638 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6639 else if (negated)
6640 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6641 else
6642 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6643 }
6644 break;
6645
6646 default:
6647 abort ();
6648 }
6649 return buf;
6650 }
6651
6652 /* Return the output template for emitting a dbra type insn.
6653
6654 Note it may perform some output operations on its own before
6655 returning the final output string. */
6656 const char *
6657 output_dbra (rtx *operands, rtx insn, int which_alternative)
6658 {
6659
6660 /* A conditional branch to the following instruction (eg the delay slot) is
6661 asking for a disaster. Be prepared! */
6662
6663 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6664 {
6665 if (which_alternative == 0)
6666 return "ldo %1(%0),%0";
6667 else if (which_alternative == 1)
6668 {
6669 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6670 output_asm_insn ("ldw -16(%%r30),%4", operands);
6671 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6672 return "{fldws|fldw} -16(%%r30),%0";
6673 }
6674 else
6675 {
6676 output_asm_insn ("ldw %0,%4", operands);
6677 return "ldo %1(%4),%4\n\tstw %4,%0";
6678 }
6679 }
6680
6681 if (which_alternative == 0)
6682 {
6683 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6684 int length = get_attr_length (insn);
6685
6686 /* If this is a long branch with its delay slot unfilled, set `nullify'
6687 as it can nullify the delay slot and save a nop. */
6688 if (length == 8 && dbr_sequence_length () == 0)
6689 nullify = 1;
6690
6691 /* If this is a short forward conditional branch which did not get
6692 its delay slot filled, the delay slot can still be nullified. */
6693 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6694 nullify = forward_branch_p (insn);
6695
6696 /* Handle short versions first. */
6697 if (length == 4 && nullify)
6698 return "addib,%C2,n %1,%0,%3";
6699 else if (length == 4 && ! nullify)
6700 return "addib,%C2 %1,%0,%3";
6701 else if (length == 8)
6702 {
6703 /* Handle weird backwards branch with a fulled delay slot
6704 which is nullified. */
6705 if (dbr_sequence_length () != 0
6706 && ! forward_branch_p (insn)
6707 && nullify)
6708 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6709 /* Handle short backwards branch with an unfilled delay slot.
6710 Using a addb;nop rather than addi;bl saves 1 cycle for both
6711 taken and untaken branches. */
6712 else if (dbr_sequence_length () == 0
6713 && ! forward_branch_p (insn)
6714 && INSN_ADDRESSES_SET_P ()
6715 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6716 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6717 return "addib,%C2 %1,%0,%3%#";
6718
6719 /* Handle normal cases. */
6720 if (nullify)
6721 return "addi,%N2 %1,%0,%0\n\tb,n %3";
6722 else
6723 return "addi,%N2 %1,%0,%0\n\tb %3";
6724 }
6725 else
6726 abort ();
6727 }
6728 /* Deal with gross reload from FP register case. */
6729 else if (which_alternative == 1)
6730 {
6731 /* Move loop counter from FP register to MEM then into a GR,
6732 increment the GR, store the GR into MEM, and finally reload
6733 the FP register from MEM from within the branch's delay slot. */
6734 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6735 operands);
6736 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6737 if (get_attr_length (insn) == 24)
6738 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6739 else
6740 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6741 }
6742 /* Deal with gross reload from memory case. */
6743 else
6744 {
6745 /* Reload loop counter from memory, the store back to memory
6746 happens in the branch's delay slot. */
6747 output_asm_insn ("ldw %0,%4", operands);
6748 if (get_attr_length (insn) == 12)
6749 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6750 else
6751 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6752 }
6753 }
6754
6755 /* Return the output template for emitting a dbra type insn.
6756
6757 Note it may perform some output operations on its own before
6758 returning the final output string. */
6759 const char *
6760 output_movb (rtx *operands, rtx insn, int which_alternative,
6761 int reverse_comparison)
6762 {
6763
6764 /* A conditional branch to the following instruction (eg the delay slot) is
6765 asking for a disaster. Be prepared! */
6766
6767 if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6768 {
6769 if (which_alternative == 0)
6770 return "copy %1,%0";
6771 else if (which_alternative == 1)
6772 {
6773 output_asm_insn ("stw %1,-16(%%r30)", operands);
6774 return "{fldws|fldw} -16(%%r30),%0";
6775 }
6776 else if (which_alternative == 2)
6777 return "stw %1,%0";
6778 else
6779 return "mtsar %r1";
6780 }
6781
6782 /* Support the second variant. */
6783 if (reverse_comparison)
6784 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6785
6786 if (which_alternative == 0)
6787 {
6788 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6789 int length = get_attr_length (insn);
6790
6791 /* If this is a long branch with its delay slot unfilled, set `nullify'
6792 as it can nullify the delay slot and save a nop. */
6793 if (length == 8 && dbr_sequence_length () == 0)
6794 nullify = 1;
6795
6796 /* If this is a short forward conditional branch which did not get
6797 its delay slot filled, the delay slot can still be nullified. */
6798 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6799 nullify = forward_branch_p (insn);
6800
6801 /* Handle short versions first. */
6802 if (length == 4 && nullify)
6803 return "movb,%C2,n %1,%0,%3";
6804 else if (length == 4 && ! nullify)
6805 return "movb,%C2 %1,%0,%3";
6806 else if (length == 8)
6807 {
6808 /* Handle weird backwards branch with a filled delay slot
6809 which is nullified. */
6810 if (dbr_sequence_length () != 0
6811 && ! forward_branch_p (insn)
6812 && nullify)
6813 return "movb,%N2,n %1,%0,.+12\n\tb %3";
6814
6815 /* Handle short backwards branch with an unfilled delay slot.
6816 Using a movb;nop rather than or;bl saves 1 cycle for both
6817 taken and untaken branches. */
6818 else if (dbr_sequence_length () == 0
6819 && ! forward_branch_p (insn)
6820 && INSN_ADDRESSES_SET_P ()
6821 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6822 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6823 return "movb,%C2 %1,%0,%3%#";
6824 /* Handle normal cases. */
6825 if (nullify)
6826 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6827 else
6828 return "or,%N2 %1,%%r0,%0\n\tb %3";
6829 }
6830 else
6831 abort ();
6832 }
6833 /* Deal with gross reload from FP register case. */
6834 else if (which_alternative == 1)
6835 {
6836 /* Move loop counter from FP register to MEM then into a GR,
6837 increment the GR, store the GR into MEM, and finally reload
6838 the FP register from MEM from within the branch's delay slot. */
6839 output_asm_insn ("stw %1,-16(%%r30)", operands);
6840 if (get_attr_length (insn) == 12)
6841 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6842 else
6843 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6844 }
6845 /* Deal with gross reload from memory case. */
6846 else if (which_alternative == 2)
6847 {
6848 /* Reload loop counter from memory, the store back to memory
6849 happens in the branch's delay slot. */
6850 if (get_attr_length (insn) == 8)
6851 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6852 else
6853 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6854 }
6855 /* Handle SAR as a destination. */
6856 else
6857 {
6858 if (get_attr_length (insn) == 8)
6859 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6860 else
6861 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
6862 }
6863 }
6864
6865 /* Copy any FP arguments in INSN into integer registers. */
6866 static void
6867 copy_fp_args (rtx insn)
6868 {
6869 rtx link;
6870 rtx xoperands[2];
6871
6872 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6873 {
6874 int arg_mode, regno;
6875 rtx use = XEXP (link, 0);
6876
6877 if (! (GET_CODE (use) == USE
6878 && GET_CODE (XEXP (use, 0)) == REG
6879 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6880 continue;
6881
6882 arg_mode = GET_MODE (XEXP (use, 0));
6883 regno = REGNO (XEXP (use, 0));
6884
6885 /* Is it a floating point register? */
6886 if (regno >= 32 && regno <= 39)
6887 {
6888 /* Copy the FP register into an integer register via memory. */
6889 if (arg_mode == SFmode)
6890 {
6891 xoperands[0] = XEXP (use, 0);
6892 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6893 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6894 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6895 }
6896 else
6897 {
6898 xoperands[0] = XEXP (use, 0);
6899 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6900 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6901 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6902 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6903 }
6904 }
6905 }
6906 }
6907
6908 /* Compute length of the FP argument copy sequence for INSN. */
6909 static int
6910 length_fp_args (rtx insn)
6911 {
6912 int length = 0;
6913 rtx link;
6914
6915 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6916 {
6917 int arg_mode, regno;
6918 rtx use = XEXP (link, 0);
6919
6920 if (! (GET_CODE (use) == USE
6921 && GET_CODE (XEXP (use, 0)) == REG
6922 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6923 continue;
6924
6925 arg_mode = GET_MODE (XEXP (use, 0));
6926 regno = REGNO (XEXP (use, 0));
6927
6928 /* Is it a floating point register? */
6929 if (regno >= 32 && regno <= 39)
6930 {
6931 if (arg_mode == SFmode)
6932 length += 8;
6933 else
6934 length += 12;
6935 }
6936 }
6937
6938 return length;
6939 }
6940
6941 /* Return the attribute length for the millicode call instruction INSN.
6942 The length must match the code generated by output_millicode_call.
6943 We include the delay slot in the returned length as it is better to
6944 over estimate the length than to under estimate it. */
6945
6946 int
6947 attr_length_millicode_call (rtx insn)
6948 {
6949 unsigned long distance = -1;
6950 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
6951
6952 if (INSN_ADDRESSES_SET_P ())
6953 {
6954 distance = (total + insn_current_reference_address (insn));
6955 if (distance < total)
6956 distance = -1;
6957 }
6958
6959 if (TARGET_64BIT)
6960 {
6961 if (!TARGET_LONG_CALLS && distance < 7600000)
6962 return 8;
6963
6964 return 20;
6965 }
6966 else if (TARGET_PORTABLE_RUNTIME)
6967 return 24;
6968 else
6969 {
6970 if (!TARGET_LONG_CALLS && distance < 240000)
6971 return 8;
6972
6973 if (TARGET_LONG_ABS_CALL && !flag_pic)
6974 return 12;
6975
6976 return 24;
6977 }
6978 }
6979
6980 /* INSN is a function call. It may have an unconditional jump
6981 in its delay slot.
6982
6983 CALL_DEST is the routine we are calling. */
6984
6985 const char *
6986 output_millicode_call (rtx insn, rtx call_dest)
6987 {
6988 int attr_length = get_attr_length (insn);
6989 int seq_length = dbr_sequence_length ();
6990 int distance;
6991 rtx seq_insn;
6992 rtx xoperands[3];
6993
6994 xoperands[0] = call_dest;
6995 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6996
6997 /* Handle the common case where we are sure that the branch will
6998 reach the beginning of the $CODE$ subspace. The within reach
6999 form of the $$sh_func_adrs call has a length of 28. Because
7000 it has an attribute type of multi, it never has a nonzero
7001 sequence length. The length of the $$sh_func_adrs is the same
7002 as certain out of reach PIC calls to other routines. */
7003 if (!TARGET_LONG_CALLS
7004 && ((seq_length == 0
7005 && (attr_length == 12
7006 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7007 || (seq_length != 0 && attr_length == 8)))
7008 {
7009 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7010 }
7011 else
7012 {
7013 if (TARGET_64BIT)
7014 {
7015 /* It might seem that one insn could be saved by accessing
7016 the millicode function using the linkage table. However,
7017 this doesn't work in shared libraries and other dynamically
7018 loaded objects. Using a pc-relative sequence also avoids
7019 problems related to the implicit use of the gp register. */
7020 output_asm_insn ("b,l .+8,%%r1", xoperands);
7021
7022 if (TARGET_GAS)
7023 {
7024 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7025 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7026 }
7027 else
7028 {
7029 xoperands[1] = gen_label_rtx ();
7030 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7031 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7032 CODE_LABEL_NUMBER (xoperands[1]));
7033 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7034 }
7035
7036 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7037 }
7038 else if (TARGET_PORTABLE_RUNTIME)
7039 {
7040 /* Pure portable runtime doesn't allow be/ble; we also don't
7041 have PIC support in the assembler/linker, so this sequence
7042 is needed. */
7043
7044 /* Get the address of our target into %r1. */
7045 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7046 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7047
7048 /* Get our return address into %r31. */
7049 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7050 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7051
7052 /* Jump to our target address in %r1. */
7053 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7054 }
7055 else if (!flag_pic)
7056 {
7057 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7058 if (TARGET_PA_20)
7059 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7060 else
7061 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7062 }
7063 else
7064 {
7065 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7066 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7067
7068 if (TARGET_SOM || !TARGET_GAS)
7069 {
7070 /* The HP assembler can generate relocations for the
7071 difference of two symbols. GAS can do this for a
7072 millicode symbol but not an arbitrary external
7073 symbol when generating SOM output. */
7074 xoperands[1] = gen_label_rtx ();
7075 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7076 CODE_LABEL_NUMBER (xoperands[1]));
7077 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7078 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7079 }
7080 else
7081 {
7082 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7083 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7084 xoperands);
7085 }
7086
7087 /* Jump to our target address in %r1. */
7088 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7089 }
7090 }
7091
7092 if (seq_length == 0)
7093 output_asm_insn ("nop", xoperands);
7094
7095 /* We are done if there isn't a jump in the delay slot. */
7096 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7097 return "";
7098
7099 /* This call has an unconditional jump in its delay slot. */
7100 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7101
7102 /* See if the return address can be adjusted. Use the containing
7103 sequence insn's address. */
7104 if (INSN_ADDRESSES_SET_P ())
7105 {
7106 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7107 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7108 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7109
7110 if (VAL_14_BITS_P (distance))
7111 {
7112 xoperands[1] = gen_label_rtx ();
7113 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7114 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7115 CODE_LABEL_NUMBER (xoperands[1]));
7116 }
7117 else
7118 /* ??? This branch may not reach its target. */
7119 output_asm_insn ("nop\n\tb,n %0", xoperands);
7120 }
7121 else
7122 /* ??? This branch may not reach its target. */
7123 output_asm_insn ("nop\n\tb,n %0", xoperands);
7124
7125 /* Delete the jump. */
7126 PUT_CODE (NEXT_INSN (insn), NOTE);
7127 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7128 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7129
7130 return "";
7131 }
7132
7133 /* Return the attribute length of the call instruction INSN. The SIBCALL
7134 flag indicates whether INSN is a regular call or a sibling call. The
7135 length returned must be longer than the code actually generated by
7136 output_call. Since branch shortening is done before delay branch
7137 sequencing, there is no way to determine whether or not the delay
7138 slot will be filled during branch shortening. Even when the delay
7139 slot is filled, we may have to add a nop if the delay slot contains
7140 a branch that can't reach its target. Thus, we always have to include
7141 the delay slot in the length estimate. This used to be done in
7142 pa_adjust_insn_length but we do it here now as some sequences always
7143 fill the delay slot and we can save four bytes in the estimate for
7144 these sequences. */
7145
7146 int
7147 attr_length_call (rtx insn, int sibcall)
7148 {
7149 int local_call;
7150 rtx call_dest;
7151 tree call_decl;
7152 int length = 0;
7153 rtx pat = PATTERN (insn);
7154 unsigned long distance = -1;
7155
7156 if (INSN_ADDRESSES_SET_P ())
7157 {
7158 unsigned long total;
7159
7160 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7161 distance = (total + insn_current_reference_address (insn));
7162 if (distance < total)
7163 distance = -1;
7164 }
7165
7166 /* Determine if this is a local call. */
7167 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7168 call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7169 else
7170 call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7171
7172 call_decl = SYMBOL_REF_DECL (call_dest);
7173 local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7174
7175 /* pc-relative branch. */
7176 if (!TARGET_LONG_CALLS
7177 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7178 || distance < 240000))
7179 length += 8;
7180
7181 /* 64-bit plabel sequence. */
7182 else if (TARGET_64BIT && !local_call)
7183 length += sibcall ? 28 : 24;
7184
7185 /* non-pic long absolute branch sequence. */
7186 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7187 length += 12;
7188
7189 /* long pc-relative branch sequence. */
7190 else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7191 || (TARGET_64BIT && !TARGET_GAS)
7192 || (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7193 {
7194 length += 20;
7195
7196 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7197 length += 8;
7198 }
7199
7200 /* 32-bit plabel sequence. */
7201 else
7202 {
7203 length += 32;
7204
7205 if (TARGET_SOM)
7206 length += length_fp_args (insn);
7207
7208 if (flag_pic)
7209 length += 4;
7210
7211 if (!TARGET_PA_20)
7212 {
7213 if (!sibcall)
7214 length += 8;
7215
7216 if (!TARGET_NO_SPACE_REGS)
7217 length += 8;
7218 }
7219 }
7220
7221 return length;
7222 }
7223
7224 /* INSN is a function call. It may have an unconditional jump
7225 in its delay slot.
7226
7227 CALL_DEST is the routine we are calling. */
7228
7229 const char *
7230 output_call (rtx insn, rtx call_dest, int sibcall)
7231 {
7232 int delay_insn_deleted = 0;
7233 int delay_slot_filled = 0;
7234 int seq_length = dbr_sequence_length ();
7235 tree call_decl = SYMBOL_REF_DECL (call_dest);
7236 int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7237 rtx xoperands[2];
7238
7239 xoperands[0] = call_dest;
7240
7241 /* Handle the common case where we're sure that the branch will reach
7242 the beginning of the "$CODE$" subspace. This is the beginning of
7243 the current function if we are in a named section. */
7244 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7245 {
7246 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7247 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7248 }
7249 else
7250 {
7251 if (TARGET_64BIT && !local_call)
7252 {
7253 /* ??? As far as I can tell, the HP linker doesn't support the
7254 long pc-relative sequence described in the 64-bit runtime
7255 architecture. So, we use a slightly longer indirect call. */
7256 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7257
7258 xoperands[0] = p->internal_label;
7259 xoperands[1] = gen_label_rtx ();
7260
7261 /* If this isn't a sibcall, we put the load of %r27 into the
7262 delay slot. We can't do this in a sibcall as we don't
7263 have a second call-clobbered scratch register available. */
7264 if (seq_length != 0
7265 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7266 && !sibcall)
7267 {
7268 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7269 optimize, 0, 0, NULL);
7270
7271 /* Now delete the delay insn. */
7272 PUT_CODE (NEXT_INSN (insn), NOTE);
7273 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7274 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7275 delay_insn_deleted = 1;
7276 }
7277
7278 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7279 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7280 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7281
7282 if (sibcall)
7283 {
7284 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7285 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7286 output_asm_insn ("bve (%%r1)", xoperands);
7287 }
7288 else
7289 {
7290 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7291 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7292 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7293 delay_slot_filled = 1;
7294 }
7295 }
7296 else
7297 {
7298 int indirect_call = 0;
7299
7300 /* Emit a long call. There are several different sequences
7301 of increasing length and complexity. In most cases,
7302 they don't allow an instruction in the delay slot. */
7303 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7304 && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7305 && !(TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7306 && !TARGET_64BIT)
7307 indirect_call = 1;
7308
7309 if (seq_length != 0
7310 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7311 && !sibcall
7312 && (!TARGET_PA_20 || indirect_call))
7313 {
7314 /* A non-jump insn in the delay slot. By definition we can
7315 emit this insn before the call (and in fact before argument
7316 relocating. */
7317 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0,
7318 NULL);
7319
7320 /* Now delete the delay insn. */
7321 PUT_CODE (NEXT_INSN (insn), NOTE);
7322 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7323 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7324 delay_insn_deleted = 1;
7325 }
7326
7327 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7328 {
7329 /* This is the best sequence for making long calls in
7330 non-pic code. Unfortunately, GNU ld doesn't provide
7331 the stub needed for external calls, and GAS's support
7332 for this with the SOM linker is buggy. It is safe
7333 to use this for local calls. */
7334 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7335 if (sibcall)
7336 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7337 else
7338 {
7339 if (TARGET_PA_20)
7340 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7341 xoperands);
7342 else
7343 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7344
7345 output_asm_insn ("copy %%r31,%%r2", xoperands);
7346 delay_slot_filled = 1;
7347 }
7348 }
7349 else
7350 {
7351 if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7352 || (TARGET_64BIT && !TARGET_GAS))
7353 {
7354 /* The HP assembler and linker can handle relocations
7355 for the difference of two symbols. GAS and the HP
7356 linker can't do this when one of the symbols is
7357 external. */
7358 xoperands[1] = gen_label_rtx ();
7359 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7360 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7361 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7362 CODE_LABEL_NUMBER (xoperands[1]));
7363 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7364 }
7365 else if (TARGET_GAS && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7366 {
7367 /* GAS currently can't generate the relocations that
7368 are needed for the SOM linker under HP-UX using this
7369 sequence. The GNU linker doesn't generate the stubs
7370 that are needed for external calls on TARGET_ELF32
7371 with this sequence. For now, we have to use a
7372 longer plabel sequence when using GAS. */
7373 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7374 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7375 xoperands);
7376 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7377 xoperands);
7378 }
7379 else
7380 {
7381 /* Emit a long plabel-based call sequence. This is
7382 essentially an inline implementation of $$dyncall.
7383 We don't actually try to call $$dyncall as this is
7384 as difficult as calling the function itself. */
7385 struct deferred_plabel *p = get_plabel (XSTR (call_dest, 0));
7386
7387 xoperands[0] = p->internal_label;
7388 xoperands[1] = gen_label_rtx ();
7389
7390 /* Since the call is indirect, FP arguments in registers
7391 need to be copied to the general registers. Then, the
7392 argument relocation stub will copy them back. */
7393 if (TARGET_SOM)
7394 copy_fp_args (insn);
7395
7396 if (flag_pic)
7397 {
7398 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7399 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7400 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7401 }
7402 else
7403 {
7404 output_asm_insn ("addil LR'%0-$global$,%%r27",
7405 xoperands);
7406 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7407 xoperands);
7408 }
7409
7410 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7411 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7412 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7413 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7414
7415 if (!sibcall && !TARGET_PA_20)
7416 {
7417 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7418 if (TARGET_NO_SPACE_REGS)
7419 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7420 else
7421 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7422 }
7423 }
7424
7425 if (TARGET_PA_20)
7426 {
7427 if (sibcall)
7428 output_asm_insn ("bve (%%r1)", xoperands);
7429 else
7430 {
7431 if (indirect_call)
7432 {
7433 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7434 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7435 delay_slot_filled = 1;
7436 }
7437 else
7438 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7439 }
7440 }
7441 else
7442 {
7443 if (!TARGET_NO_SPACE_REGS)
7444 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7445 xoperands);
7446
7447 if (sibcall)
7448 {
7449 if (TARGET_NO_SPACE_REGS)
7450 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7451 else
7452 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7453 }
7454 else
7455 {
7456 if (TARGET_NO_SPACE_REGS)
7457 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7458 else
7459 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7460
7461 if (indirect_call)
7462 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7463 else
7464 output_asm_insn ("copy %%r31,%%r2", xoperands);
7465 delay_slot_filled = 1;
7466 }
7467 }
7468 }
7469 }
7470 }
7471
7472 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7473 output_asm_insn ("nop", xoperands);
7474
7475 /* We are done if there isn't a jump in the delay slot. */
7476 if (seq_length == 0
7477 || delay_insn_deleted
7478 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7479 return "";
7480
7481 /* A sibcall should never have a branch in the delay slot. */
7482 if (sibcall)
7483 abort ();
7484
7485 /* This call has an unconditional jump in its delay slot. */
7486 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7487
7488 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7489 {
7490 /* See if the return address can be adjusted. Use the containing
7491 sequence insn's address. */
7492 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7493 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7494 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7495
7496 if (VAL_14_BITS_P (distance))
7497 {
7498 xoperands[1] = gen_label_rtx ();
7499 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7500 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7501 CODE_LABEL_NUMBER (xoperands[1]));
7502 }
7503 else
7504 output_asm_insn ("nop\n\tb,n %0", xoperands);
7505 }
7506 else
7507 output_asm_insn ("b,n %0", xoperands);
7508
7509 /* Delete the jump. */
7510 PUT_CODE (NEXT_INSN (insn), NOTE);
7511 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7512 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7513
7514 return "";
7515 }
7516
7517 /* Return the attribute length of the indirect call instruction INSN.
7518 The length must match the code generated by output_indirect call.
7519 The returned length includes the delay slot. Currently, the delay
7520 slot of an indirect call sequence is not exposed and it is used by
7521 the sequence itself. */
7522
7523 int
7524 attr_length_indirect_call (rtx insn)
7525 {
7526 unsigned long distance = -1;
7527 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7528
7529 if (INSN_ADDRESSES_SET_P ())
7530 {
7531 distance = (total + insn_current_reference_address (insn));
7532 if (distance < total)
7533 distance = -1;
7534 }
7535
7536 if (TARGET_64BIT)
7537 return 12;
7538
7539 if (TARGET_FAST_INDIRECT_CALLS
7540 || (!TARGET_PORTABLE_RUNTIME
7541 && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
7542 return 8;
7543
7544 if (flag_pic)
7545 return 24;
7546
7547 if (TARGET_PORTABLE_RUNTIME)
7548 return 20;
7549
7550 /* Out of reach, can use ble. */
7551 return 12;
7552 }
7553
7554 const char *
7555 output_indirect_call (rtx insn, rtx call_dest)
7556 {
7557 rtx xoperands[1];
7558
7559 if (TARGET_64BIT)
7560 {
7561 xoperands[0] = call_dest;
7562 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7563 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7564 return "";
7565 }
7566
7567 /* First the special case for kernels, level 0 systems, etc. */
7568 if (TARGET_FAST_INDIRECT_CALLS)
7569 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7570
7571 /* Now the normal case -- we can reach $$dyncall directly or
7572 we're sure that we can get there via a long-branch stub.
7573
7574 No need to check target flags as the length uniquely identifies
7575 the remaining cases. */
7576 if (attr_length_indirect_call (insn) == 8)
7577 return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7578
7579 /* Long millicode call, but we are not generating PIC or portable runtime
7580 code. */
7581 if (attr_length_indirect_call (insn) == 12)
7582 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7583
7584 /* Long millicode call for portable runtime. */
7585 if (attr_length_indirect_call (insn) == 20)
7586 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7587
7588 /* We need a long PIC call to $$dyncall. */
7589 xoperands[0] = NULL_RTX;
7590 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7591 if (TARGET_SOM || !TARGET_GAS)
7592 {
7593 xoperands[0] = gen_label_rtx ();
7594 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7595 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7596 CODE_LABEL_NUMBER (xoperands[0]));
7597 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7598 }
7599 else
7600 {
7601 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7602 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7603 xoperands);
7604 }
7605 output_asm_insn ("blr %%r0,%%r2", xoperands);
7606 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7607 return "";
7608 }
7609
7610 /* Return the total length of the save and restore instructions needed for
7611 the data linkage table pointer (i.e., the PIC register) across the call
7612 instruction INSN. No-return calls do not require a save and restore.
7613 In addition, we may be able to avoid the save and restore for calls
7614 within the same translation unit. */
7615
7616 int
7617 attr_length_save_restore_dltp (rtx insn)
7618 {
7619 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7620 return 0;
7621
7622 return 8;
7623 }
7624
7625 /* In HPUX 8.0's shared library scheme, special relocations are needed
7626 for function labels if they might be passed to a function
7627 in a shared library (because shared libraries don't live in code
7628 space), and special magic is needed to construct their address. */
7629
7630 void
7631 hppa_encode_label (rtx sym)
7632 {
7633 const char *str = XSTR (sym, 0);
7634 int len = strlen (str) + 1;
7635 char *newstr, *p;
7636
7637 p = newstr = alloca (len + 1);
7638 *p++ = '@';
7639 strcpy (p, str);
7640
7641 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7642 }
7643
7644 static void
7645 pa_encode_section_info (tree decl, rtx rtl, int first)
7646 {
7647 if (first && TEXT_SPACE_P (decl))
7648 {
7649 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7650 if (TREE_CODE (decl) == FUNCTION_DECL)
7651 hppa_encode_label (XEXP (rtl, 0));
7652 }
7653 }
7654
7655 /* This is sort of inverse to pa_encode_section_info. */
7656
7657 static const char *
7658 pa_strip_name_encoding (const char *str)
7659 {
7660 str += (*str == '@');
7661 str += (*str == '*');
7662 return str;
7663 }
7664
7665 int
7666 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7667 {
7668 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7669 }
7670
7671 /* Returns 1 if OP is a function label involved in a simple addition
7672 with a constant. Used to keep certain patterns from matching
7673 during instruction combination. */
7674 int
7675 is_function_label_plus_const (rtx op)
7676 {
7677 /* Strip off any CONST. */
7678 if (GET_CODE (op) == CONST)
7679 op = XEXP (op, 0);
7680
7681 return (GET_CODE (op) == PLUS
7682 && function_label_operand (XEXP (op, 0), Pmode)
7683 && GET_CODE (XEXP (op, 1)) == CONST_INT);
7684 }
7685
7686 /* Output assembly code for a thunk to FUNCTION. */
7687
7688 static void
7689 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7690 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7691 tree function)
7692 {
7693 const char *fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
7694 const char *tname = XSTR (XEXP (DECL_RTL (thunk_fndecl), 0), 0);
7695 int val_14 = VAL_14_BITS_P (delta);
7696 int nbytes = 0;
7697 static unsigned int current_thunk_number;
7698 char label[16];
7699
7700 ASM_OUTPUT_LABEL (file, tname);
7701 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7702
7703 fname = (*targetm.strip_name_encoding) (fname);
7704 tname = (*targetm.strip_name_encoding) (tname);
7705
7706 /* Output the thunk. We know that the function is in the same
7707 translation unit (i.e., the same space) as the thunk, and that
7708 thunks are output after their method. Thus, we don't need an
7709 external branch to reach the function. With SOM and GAS,
7710 functions and thunks are effectively in different sections.
7711 Thus, we can always use a IA-relative branch and the linker
7712 will add a long branch stub if necessary.
7713
7714 However, we have to be careful when generating PIC code on the
7715 SOM port to ensure that the sequence does not transfer to an
7716 import stub for the target function as this could clobber the
7717 return value saved at SP-24. This would also apply to the
7718 32-bit linux port if the multi-space model is implemented. */
7719 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7720 && !(flag_pic && TREE_PUBLIC (function))
7721 && (TARGET_GAS || last_address < 262132))
7722 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7723 && ((targetm.have_named_sections
7724 && DECL_SECTION_NAME (thunk_fndecl) != NULL
7725 /* The GNU 64-bit linker has rather poor stub management.
7726 So, we use a long branch from thunks that aren't in
7727 the same section as the target function. */
7728 && ((!TARGET_64BIT
7729 && (DECL_SECTION_NAME (thunk_fndecl)
7730 != DECL_SECTION_NAME (function)))
7731 || ((DECL_SECTION_NAME (thunk_fndecl)
7732 == DECL_SECTION_NAME (function))
7733 && last_address < 262132)))
7734 || (!targetm.have_named_sections && last_address < 262132))))
7735 {
7736 if (val_14)
7737 {
7738 fprintf (file, "\tb %s\n\tldo " HOST_WIDE_INT_PRINT_DEC
7739 "(%%r26),%%r26\n", fname, delta);
7740 nbytes += 8;
7741 }
7742 else
7743 {
7744 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7745 ",%%r26\n", delta);
7746 fprintf (file, "\tb %s\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7747 "(%%r1),%%r26\n", fname, delta);
7748 nbytes += 12;
7749 }
7750 }
7751 else if (TARGET_64BIT)
7752 {
7753 /* We only have one call-clobbered scratch register, so we can't
7754 make use of the delay slot if delta doesn't fit in 14 bits. */
7755 if (!val_14)
7756 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7757 ",%%r26\n\tldo R'" HOST_WIDE_INT_PRINT_DEC
7758 "(%%r1),%%r26\n", delta, delta);
7759
7760 fprintf (file, "\tb,l .+8,%%r1\n");
7761
7762 if (TARGET_GAS)
7763 {
7764 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7765 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r1\n", fname);
7766 }
7767 else
7768 {
7769 int off = val_14 ? 8 : 16;
7770 fprintf (file, "\taddil L'%s-%s-%d,%%r1\n", fname, tname, off);
7771 fprintf (file, "\tldo R'%s-%s-%d(%%r1),%%r1\n", fname, tname, off);
7772 }
7773
7774 if (val_14)
7775 {
7776 fprintf (file, "\tbv %%r0(%%r1)\n\tldo ");
7777 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7778 nbytes += 20;
7779 }
7780 else
7781 {
7782 fprintf (file, "\tbv,n %%r0(%%r1)\n");
7783 nbytes += 24;
7784 }
7785 }
7786 else if (TARGET_PORTABLE_RUNTIME)
7787 {
7788 fprintf (file, "\tldil L'%s,%%r1\n", fname);
7789 fprintf (file, "\tldo R'%s(%%r1),%%r22\n", fname);
7790
7791 if (val_14)
7792 {
7793 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7794 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7795 nbytes += 16;
7796 }
7797 else
7798 {
7799 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7800 ",%%r26\n", delta);
7801 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7802 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7803 nbytes += 20;
7804 }
7805 }
7806 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7807 {
7808 /* The function is accessible from outside this module. The only
7809 way to avoid an import stub between the thunk and function is to
7810 call the function directly with an indirect sequence similar to
7811 that used by $$dyncall. This is possible because $$dyncall acts
7812 as the import stub in an indirect call. */
7813 const char *lab;
7814
7815 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7816 lab = (*targetm.strip_name_encoding) (label);
7817
7818 fprintf (file, "\taddil LT'%s,%%r19\n", lab);
7819 fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
7820 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7821 fprintf (file, "\tbb,>=,n %%r22,30,.+16\n");
7822 fprintf (file, "\tdepi 0,31,2,%%r22\n");
7823 fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
7824 fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
7825 if (!val_14)
7826 {
7827 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7828 ",%%r26\n", delta);
7829 nbytes += 4;
7830 }
7831 if (TARGET_PA_20)
7832 {
7833 fprintf (file, "\tbve (%%r22)\n\tldo ");
7834 nbytes += 36;
7835 }
7836 else
7837 {
7838 if (TARGET_NO_SPACE_REGS)
7839 {
7840 fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
7841 nbytes += 36;
7842 }
7843 else
7844 {
7845 fprintf (file, "\tldsid (%%sr0,%%r22),%%r21\n");
7846 fprintf (file, "\tmtsp %%r21,%%sr0\n");
7847 fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
7848 nbytes += 44;
7849 }
7850 }
7851
7852 if (val_14)
7853 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7854 else
7855 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7856 }
7857 else if (flag_pic)
7858 {
7859 if (TARGET_PA_20)
7860 fprintf (file, "\tb,l .+8,%%r1\n");
7861 else
7862 fprintf (file, "\tbl .+8,%%r1\n");
7863
7864 if (TARGET_SOM || !TARGET_GAS)
7865 {
7866 fprintf (file, "\taddil L'%s-%s-8,%%r1\n", fname, tname);
7867 fprintf (file, "\tldo R'%s-%s-8(%%r1),%%r22\n", fname, tname);
7868 }
7869 else
7870 {
7871 fprintf (file, "\taddil L'%s-$PIC_pcrel$0+4,%%r1\n", fname);
7872 fprintf (file, "\tldo R'%s-$PIC_pcrel$0+8(%%r1),%%r22\n", fname);
7873 }
7874
7875 if (val_14)
7876 {
7877 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7878 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7879 nbytes += 20;
7880 }
7881 else
7882 {
7883 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC
7884 ",%%r26\n", delta);
7885 fprintf (file, "\tbv %%r0(%%r22)\n\tldo ");
7886 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7887 nbytes += 24;
7888 }
7889 }
7890 else
7891 {
7892 if (!val_14)
7893 fprintf (file, "\taddil L'" HOST_WIDE_INT_PRINT_DEC ",%%r26\n", delta);
7894
7895 fprintf (file, "\tldil L'%s,%%r22\n", fname);
7896 fprintf (file, "\tbe R'%s(%%sr4,%%r22)\n\tldo ", fname);
7897
7898 if (val_14)
7899 {
7900 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%%r26),%%r26\n", delta);
7901 nbytes += 12;
7902 }
7903 else
7904 {
7905 fprintf (file, "R'" HOST_WIDE_INT_PRINT_DEC "(%%r1),%%r26\n", delta);
7906 nbytes += 16;
7907 }
7908 }
7909
7910 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
7911
7912 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7913 {
7914 data_section ();
7915 fprintf (file, "\t.align 4\n");
7916 ASM_OUTPUT_LABEL (file, label);
7917 fprintf (file, "\t.word P'%s\n", fname);
7918 function_section (thunk_fndecl);
7919 }
7920
7921 current_thunk_number++;
7922 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
7923 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
7924 last_address += nbytes;
7925 update_total_code_bytes (nbytes);
7926 }
7927
7928 /* Only direct calls to static functions are allowed to be sibling (tail)
7929 call optimized.
7930
7931 This restriction is necessary because some linker generated stubs will
7932 store return pointers into rp' in some cases which might clobber a
7933 live value already in rp'.
7934
7935 In a sibcall the current function and the target function share stack
7936 space. Thus if the path to the current function and the path to the
7937 target function save a value in rp', they save the value into the
7938 same stack slot, which has undesirable consequences.
7939
7940 Because of the deferred binding nature of shared libraries any function
7941 with external scope could be in a different load module and thus require
7942 rp' to be saved when calling that function. So sibcall optimizations
7943 can only be safe for static function.
7944
7945 Note that GCC never needs return value relocations, so we don't have to
7946 worry about static calls with return value relocations (which require
7947 saving rp').
7948
7949 It is safe to perform a sibcall optimization when the target function
7950 will never return. */
7951 static bool
7952 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7953 {
7954 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
7955 single subspace mode and the call is not indirect. As far as I know,
7956 there is no operating system support for the multiple subspace mode.
7957 It might be possible to support indirect calls if we didn't use
7958 $$dyncall (see the indirect sequence generated in output_call). */
7959 if (TARGET_ELF32)
7960 return (decl != NULL_TREE);
7961
7962 /* Sibcalls are not ok because the arg pointer register is not a fixed
7963 register. This prevents the sibcall optimization from occurring. In
7964 addition, there are problems with stub placement using GNU ld. This
7965 is because a normal sibcall branch uses a 17-bit relocation while
7966 a regular call branch uses a 22-bit relocation. As a result, more
7967 care needs to be taken in the placement of long-branch stubs. */
7968 if (TARGET_64BIT)
7969 return false;
7970
7971 return (decl
7972 && !TARGET_PORTABLE_RUNTIME
7973 && !TREE_PUBLIC (decl));
7974 }
7975
7976 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
7977 use in fmpyadd instructions. */
7978 int
7979 fmpyaddoperands (rtx *operands)
7980 {
7981 enum machine_mode mode = GET_MODE (operands[0]);
7982
7983 /* Must be a floating point mode. */
7984 if (mode != SFmode && mode != DFmode)
7985 return 0;
7986
7987 /* All modes must be the same. */
7988 if (! (mode == GET_MODE (operands[1])
7989 && mode == GET_MODE (operands[2])
7990 && mode == GET_MODE (operands[3])
7991 && mode == GET_MODE (operands[4])
7992 && mode == GET_MODE (operands[5])))
7993 return 0;
7994
7995 /* All operands must be registers. */
7996 if (! (GET_CODE (operands[1]) == REG
7997 && GET_CODE (operands[2]) == REG
7998 && GET_CODE (operands[3]) == REG
7999 && GET_CODE (operands[4]) == REG
8000 && GET_CODE (operands[5]) == REG))
8001 return 0;
8002
8003 /* Only 2 real operands to the addition. One of the input operands must
8004 be the same as the output operand. */
8005 if (! rtx_equal_p (operands[3], operands[4])
8006 && ! rtx_equal_p (operands[3], operands[5]))
8007 return 0;
8008
8009 /* Inout operand of add can not conflict with any operands from multiply. */
8010 if (rtx_equal_p (operands[3], operands[0])
8011 || rtx_equal_p (operands[3], operands[1])
8012 || rtx_equal_p (operands[3], operands[2]))
8013 return 0;
8014
8015 /* multiply can not feed into addition operands. */
8016 if (rtx_equal_p (operands[4], operands[0])
8017 || rtx_equal_p (operands[5], operands[0]))
8018 return 0;
8019
8020 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8021 if (mode == SFmode
8022 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8023 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8024 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8025 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8026 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8027 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8028 return 0;
8029
8030 /* Passed. Operands are suitable for fmpyadd. */
8031 return 1;
8032 }
8033
8034 #if !defined(USE_COLLECT2)
8035 static void
8036 pa_asm_out_constructor (rtx symbol, int priority)
8037 {
8038 if (!function_label_operand (symbol, VOIDmode))
8039 hppa_encode_label (symbol);
8040
8041 #ifdef CTORS_SECTION_ASM_OP
8042 default_ctor_section_asm_out_constructor (symbol, priority);
8043 #else
8044 # ifdef TARGET_ASM_NAMED_SECTION
8045 default_named_section_asm_out_constructor (symbol, priority);
8046 # else
8047 default_stabs_asm_out_constructor (symbol, priority);
8048 # endif
8049 #endif
8050 }
8051
8052 static void
8053 pa_asm_out_destructor (rtx symbol, int priority)
8054 {
8055 if (!function_label_operand (symbol, VOIDmode))
8056 hppa_encode_label (symbol);
8057
8058 #ifdef DTORS_SECTION_ASM_OP
8059 default_dtor_section_asm_out_destructor (symbol, priority);
8060 #else
8061 # ifdef TARGET_ASM_NAMED_SECTION
8062 default_named_section_asm_out_destructor (symbol, priority);
8063 # else
8064 default_stabs_asm_out_destructor (symbol, priority);
8065 # endif
8066 #endif
8067 }
8068 #endif
8069
8070 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8071 use in fmpysub instructions. */
8072 int
8073 fmpysuboperands (rtx *operands)
8074 {
8075 enum machine_mode mode = GET_MODE (operands[0]);
8076
8077 /* Must be a floating point mode. */
8078 if (mode != SFmode && mode != DFmode)
8079 return 0;
8080
8081 /* All modes must be the same. */
8082 if (! (mode == GET_MODE (operands[1])
8083 && mode == GET_MODE (operands[2])
8084 && mode == GET_MODE (operands[3])
8085 && mode == GET_MODE (operands[4])
8086 && mode == GET_MODE (operands[5])))
8087 return 0;
8088
8089 /* All operands must be registers. */
8090 if (! (GET_CODE (operands[1]) == REG
8091 && GET_CODE (operands[2]) == REG
8092 && GET_CODE (operands[3]) == REG
8093 && GET_CODE (operands[4]) == REG
8094 && GET_CODE (operands[5]) == REG))
8095 return 0;
8096
8097 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8098 operation, so operands[4] must be the same as operand[3]. */
8099 if (! rtx_equal_p (operands[3], operands[4]))
8100 return 0;
8101
8102 /* multiply can not feed into subtraction. */
8103 if (rtx_equal_p (operands[5], operands[0]))
8104 return 0;
8105
8106 /* Inout operand of sub can not conflict with any operands from multiply. */
8107 if (rtx_equal_p (operands[3], operands[0])
8108 || rtx_equal_p (operands[3], operands[1])
8109 || rtx_equal_p (operands[3], operands[2]))
8110 return 0;
8111
8112 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8113 if (mode == SFmode
8114 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8115 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8116 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8117 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8118 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8119 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8120 return 0;
8121
8122 /* Passed. Operands are suitable for fmpysub. */
8123 return 1;
8124 }
8125
8126 int
8127 plus_xor_ior_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8128 {
8129 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
8130 || GET_CODE (op) == IOR);
8131 }
8132
8133 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8134 constants for shadd instructions. */
8135 static int
8136 shadd_constant_p (int val)
8137 {
8138 if (val == 2 || val == 4 || val == 8)
8139 return 1;
8140 else
8141 return 0;
8142 }
8143
8144 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
8145 the valid constant for shadd instructions. */
8146 int
8147 shadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8148 {
8149 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
8150 }
8151
8152 /* Return 1 if OP is valid as a base or index register in a
8153 REG+REG address. */
8154
8155 int
8156 borx_reg_operand (rtx op, enum machine_mode mode)
8157 {
8158 if (GET_CODE (op) != REG)
8159 return 0;
8160
8161 /* We must reject virtual registers as the only expressions that
8162 can be instantiated are REG and REG+CONST. */
8163 if (op == virtual_incoming_args_rtx
8164 || op == virtual_stack_vars_rtx
8165 || op == virtual_stack_dynamic_rtx
8166 || op == virtual_outgoing_args_rtx
8167 || op == virtual_cfa_rtx)
8168 return 0;
8169
8170 /* While it's always safe to index off the frame pointer, it's not
8171 profitable to do so when the frame pointer is being eliminated. */
8172 if (!reload_completed
8173 && flag_omit_frame_pointer
8174 && !current_function_calls_alloca
8175 && op == frame_pointer_rtx)
8176 return 0;
8177
8178 return register_operand (op, mode);
8179 }
8180
8181 /* Return 1 if this operand is anything other than a hard register. */
8182
8183 int
8184 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8185 {
8186 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8187 }
8188
8189 /* Return 1 if INSN branches forward. Should be using insn_addresses
8190 to avoid walking through all the insns... */
8191 static int
8192 forward_branch_p (rtx insn)
8193 {
8194 rtx label = JUMP_LABEL (insn);
8195
8196 while (insn)
8197 {
8198 if (insn == label)
8199 break;
8200 else
8201 insn = NEXT_INSN (insn);
8202 }
8203
8204 return (insn == label);
8205 }
8206
8207 /* Return 1 if OP is an equality comparison, else return 0. */
8208 int
8209 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8210 {
8211 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8212 }
8213
8214 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
8215 int
8216 movb_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8217 {
8218 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
8219 || GET_CODE (op) == LT || GET_CODE (op) == GE);
8220 }
8221
8222 /* Return 1 if INSN is in the delay slot of a call instruction. */
8223 int
8224 jump_in_call_delay (rtx insn)
8225 {
8226
8227 if (GET_CODE (insn) != JUMP_INSN)
8228 return 0;
8229
8230 if (PREV_INSN (insn)
8231 && PREV_INSN (PREV_INSN (insn))
8232 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8233 {
8234 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8235
8236 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8237 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8238
8239 }
8240 else
8241 return 0;
8242 }
8243
8244 /* Output an unconditional move and branch insn. */
8245
8246 const char *
8247 output_parallel_movb (rtx *operands, int length)
8248 {
8249 /* These are the cases in which we win. */
8250 if (length == 4)
8251 return "mov%I1b,tr %1,%0,%2";
8252
8253 /* None of these cases wins, but they don't lose either. */
8254 if (dbr_sequence_length () == 0)
8255 {
8256 /* Nothing in the delay slot, fake it by putting the combined
8257 insn (the copy or add) in the delay slot of a bl. */
8258 if (GET_CODE (operands[1]) == CONST_INT)
8259 return "b %2\n\tldi %1,%0";
8260 else
8261 return "b %2\n\tcopy %1,%0";
8262 }
8263 else
8264 {
8265 /* Something in the delay slot, but we've got a long branch. */
8266 if (GET_CODE (operands[1]) == CONST_INT)
8267 return "ldi %1,%0\n\tb %2";
8268 else
8269 return "copy %1,%0\n\tb %2";
8270 }
8271 }
8272
8273 /* Output an unconditional add and branch insn. */
8274
8275 const char *
8276 output_parallel_addb (rtx *operands, int length)
8277 {
8278 /* To make life easy we want operand0 to be the shared input/output
8279 operand and operand1 to be the readonly operand. */
8280 if (operands[0] == operands[1])
8281 operands[1] = operands[2];
8282
8283 /* These are the cases in which we win. */
8284 if (length == 4)
8285 return "add%I1b,tr %1,%0,%3";
8286
8287 /* None of these cases win, but they don't lose either. */
8288 if (dbr_sequence_length () == 0)
8289 {
8290 /* Nothing in the delay slot, fake it by putting the combined
8291 insn (the copy or add) in the delay slot of a bl. */
8292 return "b %3\n\tadd%I1 %1,%0,%0";
8293 }
8294 else
8295 {
8296 /* Something in the delay slot, but we've got a long branch. */
8297 return "add%I1 %1,%0,%0\n\tb %3";
8298 }
8299 }
8300
8301 /* Return nonzero if INSN (a jump insn) immediately follows a call
8302 to a named function. This is used to avoid filling the delay slot
8303 of the jump since it can usually be eliminated by modifying RP in
8304 the delay slot of the call. */
8305
8306 int
8307 following_call (rtx insn)
8308 {
8309 if (! TARGET_JUMP_IN_DELAY)
8310 return 0;
8311
8312 /* Find the previous real insn, skipping NOTEs. */
8313 insn = PREV_INSN (insn);
8314 while (insn && GET_CODE (insn) == NOTE)
8315 insn = PREV_INSN (insn);
8316
8317 /* Check for CALL_INSNs and millicode calls. */
8318 if (insn
8319 && ((GET_CODE (insn) == CALL_INSN
8320 && get_attr_type (insn) != TYPE_DYNCALL)
8321 || (GET_CODE (insn) == INSN
8322 && GET_CODE (PATTERN (insn)) != SEQUENCE
8323 && GET_CODE (PATTERN (insn)) != USE
8324 && GET_CODE (PATTERN (insn)) != CLOBBER
8325 && get_attr_type (insn) == TYPE_MILLI)))
8326 return 1;
8327
8328 return 0;
8329 }
8330
8331 /* We use this hook to perform a PA specific optimization which is difficult
8332 to do in earlier passes.
8333
8334 We want the delay slots of branches within jump tables to be filled.
8335 None of the compiler passes at the moment even has the notion that a
8336 PA jump table doesn't contain addresses, but instead contains actual
8337 instructions!
8338
8339 Because we actually jump into the table, the addresses of each entry
8340 must stay constant in relation to the beginning of the table (which
8341 itself must stay constant relative to the instruction to jump into
8342 it). I don't believe we can guarantee earlier passes of the compiler
8343 will adhere to those rules.
8344
8345 So, late in the compilation process we find all the jump tables, and
8346 expand them into real code -- eg each entry in the jump table vector
8347 will get an appropriate label followed by a jump to the final target.
8348
8349 Reorg and the final jump pass can then optimize these branches and
8350 fill their delay slots. We end up with smaller, more efficient code.
8351
8352 The jump instructions within the table are special; we must be able
8353 to identify them during assembly output (if the jumps don't get filled
8354 we need to emit a nop rather than nullifying the delay slot)). We
8355 identify jumps in switch tables by using insns with the attribute
8356 type TYPE_BTABLE_BRANCH.
8357
8358 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8359 insns. This serves two purposes, first it prevents jump.c from
8360 noticing that the last N entries in the table jump to the instruction
8361 immediately after the table and deleting the jumps. Second, those
8362 insns mark where we should emit .begin_brtab and .end_brtab directives
8363 when using GAS (allows for better link time optimizations). */
8364
8365 static void
8366 pa_reorg (void)
8367 {
8368 rtx insn;
8369
8370 remove_useless_addtr_insns (1);
8371
8372 if (pa_cpu < PROCESSOR_8000)
8373 pa_combine_instructions ();
8374
8375
8376 /* This is fairly cheap, so always run it if optimizing. */
8377 if (optimize > 0 && !TARGET_BIG_SWITCH)
8378 {
8379 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8380 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8381 {
8382 rtx pattern, tmp, location, label;
8383 unsigned int length, i;
8384
8385 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8386 if (GET_CODE (insn) != JUMP_INSN
8387 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8388 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8389 continue;
8390
8391 /* Emit marker for the beginning of the branch table. */
8392 emit_insn_before (gen_begin_brtab (), insn);
8393
8394 pattern = PATTERN (insn);
8395 location = PREV_INSN (insn);
8396 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8397
8398 for (i = 0; i < length; i++)
8399 {
8400 /* Emit a label before each jump to keep jump.c from
8401 removing this code. */
8402 tmp = gen_label_rtx ();
8403 LABEL_NUSES (tmp) = 1;
8404 emit_label_after (tmp, location);
8405 location = NEXT_INSN (location);
8406
8407 if (GET_CODE (pattern) == ADDR_VEC)
8408 label = XEXP (XVECEXP (pattern, 0, i), 0);
8409 else
8410 label = XEXP (XVECEXP (pattern, 1, i), 0);
8411
8412 tmp = gen_short_jump (label);
8413
8414 /* Emit the jump itself. */
8415 tmp = emit_jump_insn_after (tmp, location);
8416 JUMP_LABEL (tmp) = label;
8417 LABEL_NUSES (label)++;
8418 location = NEXT_INSN (location);
8419
8420 /* Emit a BARRIER after the jump. */
8421 emit_barrier_after (location);
8422 location = NEXT_INSN (location);
8423 }
8424
8425 /* Emit marker for the end of the branch table. */
8426 emit_insn_before (gen_end_brtab (), location);
8427 location = NEXT_INSN (location);
8428 emit_barrier_after (location);
8429
8430 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8431 delete_insn (insn);
8432 }
8433 }
8434 else
8435 {
8436 /* Still need brtab marker insns. FIXME: the presence of these
8437 markers disables output of the branch table to readonly memory,
8438 and any alignment directives that might be needed. Possibly,
8439 the begin_brtab insn should be output before the label for the
8440 table. This doesn't matter at the moment since the tables are
8441 always output in the text section. */
8442 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8443 {
8444 /* Find an ADDR_VEC insn. */
8445 if (GET_CODE (insn) != JUMP_INSN
8446 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8447 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8448 continue;
8449
8450 /* Now generate markers for the beginning and end of the
8451 branch table. */
8452 emit_insn_before (gen_begin_brtab (), insn);
8453 emit_insn_after (gen_end_brtab (), insn);
8454 }
8455 }
8456 }
8457
8458 /* The PA has a number of odd instructions which can perform multiple
8459 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8460 it may be profitable to combine two instructions into one instruction
8461 with two outputs. It's not profitable PA2.0 machines because the
8462 two outputs would take two slots in the reorder buffers.
8463
8464 This routine finds instructions which can be combined and combines
8465 them. We only support some of the potential combinations, and we
8466 only try common ways to find suitable instructions.
8467
8468 * addb can add two registers or a register and a small integer
8469 and jump to a nearby (+-8k) location. Normally the jump to the
8470 nearby location is conditional on the result of the add, but by
8471 using the "true" condition we can make the jump unconditional.
8472 Thus addb can perform two independent operations in one insn.
8473
8474 * movb is similar to addb in that it can perform a reg->reg
8475 or small immediate->reg copy and jump to a nearby (+-8k location).
8476
8477 * fmpyadd and fmpysub can perform a FP multiply and either an
8478 FP add or FP sub if the operands of the multiply and add/sub are
8479 independent (there are other minor restrictions). Note both
8480 the fmpy and fadd/fsub can in theory move to better spots according
8481 to data dependencies, but for now we require the fmpy stay at a
8482 fixed location.
8483
8484 * Many of the memory operations can perform pre & post updates
8485 of index registers. GCC's pre/post increment/decrement addressing
8486 is far too simple to take advantage of all the possibilities. This
8487 pass may not be suitable since those insns may not be independent.
8488
8489 * comclr can compare two ints or an int and a register, nullify
8490 the following instruction and zero some other register. This
8491 is more difficult to use as it's harder to find an insn which
8492 will generate a comclr than finding something like an unconditional
8493 branch. (conditional moves & long branches create comclr insns).
8494
8495 * Most arithmetic operations can conditionally skip the next
8496 instruction. They can be viewed as "perform this operation
8497 and conditionally jump to this nearby location" (where nearby
8498 is an insns away). These are difficult to use due to the
8499 branch length restrictions. */
8500
8501 static void
8502 pa_combine_instructions (void)
8503 {
8504 rtx anchor, new;
8505
8506 /* This can get expensive since the basic algorithm is on the
8507 order of O(n^2) (or worse). Only do it for -O2 or higher
8508 levels of optimization. */
8509 if (optimize < 2)
8510 return;
8511
8512 /* Walk down the list of insns looking for "anchor" insns which
8513 may be combined with "floating" insns. As the name implies,
8514 "anchor" instructions don't move, while "floating" insns may
8515 move around. */
8516 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8517 new = make_insn_raw (new);
8518
8519 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8520 {
8521 enum attr_pa_combine_type anchor_attr;
8522 enum attr_pa_combine_type floater_attr;
8523
8524 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8525 Also ignore any special USE insns. */
8526 if ((GET_CODE (anchor) != INSN
8527 && GET_CODE (anchor) != JUMP_INSN
8528 && GET_CODE (anchor) != CALL_INSN)
8529 || GET_CODE (PATTERN (anchor)) == USE
8530 || GET_CODE (PATTERN (anchor)) == CLOBBER
8531 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8532 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8533 continue;
8534
8535 anchor_attr = get_attr_pa_combine_type (anchor);
8536 /* See if anchor is an insn suitable for combination. */
8537 if (anchor_attr == PA_COMBINE_TYPE_FMPY
8538 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8539 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8540 && ! forward_branch_p (anchor)))
8541 {
8542 rtx floater;
8543
8544 for (floater = PREV_INSN (anchor);
8545 floater;
8546 floater = PREV_INSN (floater))
8547 {
8548 if (GET_CODE (floater) == NOTE
8549 || (GET_CODE (floater) == INSN
8550 && (GET_CODE (PATTERN (floater)) == USE
8551 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8552 continue;
8553
8554 /* Anything except a regular INSN will stop our search. */
8555 if (GET_CODE (floater) != INSN
8556 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8557 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8558 {
8559 floater = NULL_RTX;
8560 break;
8561 }
8562
8563 /* See if FLOATER is suitable for combination with the
8564 anchor. */
8565 floater_attr = get_attr_pa_combine_type (floater);
8566 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8567 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8568 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8569 && floater_attr == PA_COMBINE_TYPE_FMPY))
8570 {
8571 /* If ANCHOR and FLOATER can be combined, then we're
8572 done with this pass. */
8573 if (pa_can_combine_p (new, anchor, floater, 0,
8574 SET_DEST (PATTERN (floater)),
8575 XEXP (SET_SRC (PATTERN (floater)), 0),
8576 XEXP (SET_SRC (PATTERN (floater)), 1)))
8577 break;
8578 }
8579
8580 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8581 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8582 {
8583 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8584 {
8585 if (pa_can_combine_p (new, anchor, floater, 0,
8586 SET_DEST (PATTERN (floater)),
8587 XEXP (SET_SRC (PATTERN (floater)), 0),
8588 XEXP (SET_SRC (PATTERN (floater)), 1)))
8589 break;
8590 }
8591 else
8592 {
8593 if (pa_can_combine_p (new, anchor, floater, 0,
8594 SET_DEST (PATTERN (floater)),
8595 SET_SRC (PATTERN (floater)),
8596 SET_SRC (PATTERN (floater))))
8597 break;
8598 }
8599 }
8600 }
8601
8602 /* If we didn't find anything on the backwards scan try forwards. */
8603 if (!floater
8604 && (anchor_attr == PA_COMBINE_TYPE_FMPY
8605 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8606 {
8607 for (floater = anchor; floater; floater = NEXT_INSN (floater))
8608 {
8609 if (GET_CODE (floater) == NOTE
8610 || (GET_CODE (floater) == INSN
8611 && (GET_CODE (PATTERN (floater)) == USE
8612 || GET_CODE (PATTERN (floater)) == CLOBBER)))
8613
8614 continue;
8615
8616 /* Anything except a regular INSN will stop our search. */
8617 if (GET_CODE (floater) != INSN
8618 || GET_CODE (PATTERN (floater)) == ADDR_VEC
8619 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8620 {
8621 floater = NULL_RTX;
8622 break;
8623 }
8624
8625 /* See if FLOATER is suitable for combination with the
8626 anchor. */
8627 floater_attr = get_attr_pa_combine_type (floater);
8628 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8629 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8630 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8631 && floater_attr == PA_COMBINE_TYPE_FMPY))
8632 {
8633 /* If ANCHOR and FLOATER can be combined, then we're
8634 done with this pass. */
8635 if (pa_can_combine_p (new, anchor, floater, 1,
8636 SET_DEST (PATTERN (floater)),
8637 XEXP (SET_SRC (PATTERN (floater)),
8638 0),
8639 XEXP (SET_SRC (PATTERN (floater)),
8640 1)))
8641 break;
8642 }
8643 }
8644 }
8645
8646 /* FLOATER will be nonzero if we found a suitable floating
8647 insn for combination with ANCHOR. */
8648 if (floater
8649 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8650 || anchor_attr == PA_COMBINE_TYPE_FMPY))
8651 {
8652 /* Emit the new instruction and delete the old anchor. */
8653 emit_insn_before (gen_rtx_PARALLEL
8654 (VOIDmode,
8655 gen_rtvec (2, PATTERN (anchor),
8656 PATTERN (floater))),
8657 anchor);
8658
8659 PUT_CODE (anchor, NOTE);
8660 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8661 NOTE_SOURCE_FILE (anchor) = 0;
8662
8663 /* Emit a special USE insn for FLOATER, then delete
8664 the floating insn. */
8665 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8666 delete_insn (floater);
8667
8668 continue;
8669 }
8670 else if (floater
8671 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8672 {
8673 rtx temp;
8674 /* Emit the new_jump instruction and delete the old anchor. */
8675 temp
8676 = emit_jump_insn_before (gen_rtx_PARALLEL
8677 (VOIDmode,
8678 gen_rtvec (2, PATTERN (anchor),
8679 PATTERN (floater))),
8680 anchor);
8681
8682 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8683 PUT_CODE (anchor, NOTE);
8684 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8685 NOTE_SOURCE_FILE (anchor) = 0;
8686
8687 /* Emit a special USE insn for FLOATER, then delete
8688 the floating insn. */
8689 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8690 delete_insn (floater);
8691 continue;
8692 }
8693 }
8694 }
8695 }
8696
8697 static int
8698 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8699 rtx src1, rtx src2)
8700 {
8701 int insn_code_number;
8702 rtx start, end;
8703
8704 /* Create a PARALLEL with the patterns of ANCHOR and
8705 FLOATER, try to recognize it, then test constraints
8706 for the resulting pattern.
8707
8708 If the pattern doesn't match or the constraints
8709 aren't met keep searching for a suitable floater
8710 insn. */
8711 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8712 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8713 INSN_CODE (new) = -1;
8714 insn_code_number = recog_memoized (new);
8715 if (insn_code_number < 0
8716 || (extract_insn (new), ! constrain_operands (1)))
8717 return 0;
8718
8719 if (reversed)
8720 {
8721 start = anchor;
8722 end = floater;
8723 }
8724 else
8725 {
8726 start = floater;
8727 end = anchor;
8728 }
8729
8730 /* There's up to three operands to consider. One
8731 output and two inputs.
8732
8733 The output must not be used between FLOATER & ANCHOR
8734 exclusive. The inputs must not be set between
8735 FLOATER and ANCHOR exclusive. */
8736
8737 if (reg_used_between_p (dest, start, end))
8738 return 0;
8739
8740 if (reg_set_between_p (src1, start, end))
8741 return 0;
8742
8743 if (reg_set_between_p (src2, start, end))
8744 return 0;
8745
8746 /* If we get here, then everything is good. */
8747 return 1;
8748 }
8749
8750 /* Return nonzero if references for INSN are delayed.
8751
8752 Millicode insns are actually function calls with some special
8753 constraints on arguments and register usage.
8754
8755 Millicode calls always expect their arguments in the integer argument
8756 registers, and always return their result in %r29 (ret1). They
8757 are expected to clobber their arguments, %r1, %r29, and the return
8758 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8759
8760 This function tells reorg that the references to arguments and
8761 millicode calls do not appear to happen until after the millicode call.
8762 This allows reorg to put insns which set the argument registers into the
8763 delay slot of the millicode call -- thus they act more like traditional
8764 CALL_INSNs.
8765
8766 Note we can not consider side effects of the insn to be delayed because
8767 the branch and link insn will clobber the return pointer. If we happened
8768 to use the return pointer in the delay slot of the call, then we lose.
8769
8770 get_attr_type will try to recognize the given insn, so make sure to
8771 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8772 in particular. */
8773 int
8774 insn_refs_are_delayed (rtx insn)
8775 {
8776 return ((GET_CODE (insn) == INSN
8777 && GET_CODE (PATTERN (insn)) != SEQUENCE
8778 && GET_CODE (PATTERN (insn)) != USE
8779 && GET_CODE (PATTERN (insn)) != CLOBBER
8780 && get_attr_type (insn) == TYPE_MILLI));
8781 }
8782
8783 /* On the HP-PA the value is found in register(s) 28(-29), unless
8784 the mode is SF or DF. Then the value is returned in fr4 (32).
8785
8786 This must perform the same promotions as PROMOTE_MODE, else
8787 TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8788
8789 Small structures must be returned in a PARALLEL on PA64 in order
8790 to match the HP Compiler ABI. */
8791
8792 rtx
8793 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8794 {
8795 enum machine_mode valmode;
8796
8797 /* Aggregates with a size less than or equal to 128 bits are returned
8798 in GR 28(-29). They are left justified. The pad bits are undefined.
8799 Larger aggregates are returned in memory. */
8800 if (TARGET_64BIT && AGGREGATE_TYPE_P (valtype))
8801 {
8802 rtx loc[2];
8803 int i, offset = 0;
8804 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
8805
8806 for (i = 0; i < ub; i++)
8807 {
8808 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8809 gen_rtx_REG (DImode, 28 + i),
8810 GEN_INT (offset));
8811 offset += 8;
8812 }
8813
8814 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
8815 }
8816
8817 if ((INTEGRAL_TYPE_P (valtype)
8818 && TYPE_PRECISION (valtype) < BITS_PER_WORD)
8819 || POINTER_TYPE_P (valtype))
8820 valmode = word_mode;
8821 else
8822 valmode = TYPE_MODE (valtype);
8823
8824 if (TREE_CODE (valtype) == REAL_TYPE
8825 && TYPE_MODE (valtype) != TFmode
8826 && !TARGET_SOFT_FLOAT)
8827 return gen_rtx_REG (valmode, 32);
8828
8829 return gen_rtx_REG (valmode, 28);
8830 }
8831
8832 /* Return the location of a parameter that is passed in a register or NULL
8833 if the parameter has any component that is passed in memory.
8834
8835 This is new code and will be pushed to into the net sources after
8836 further testing.
8837
8838 ??? We might want to restructure this so that it looks more like other
8839 ports. */
8840 rtx
8841 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
8842 int named ATTRIBUTE_UNUSED)
8843 {
8844 int max_arg_words = (TARGET_64BIT ? 8 : 4);
8845 int alignment = 0;
8846 int arg_size;
8847 int fpr_reg_base;
8848 int gpr_reg_base;
8849 rtx retval;
8850
8851 if (mode == VOIDmode)
8852 return NULL_RTX;
8853
8854 arg_size = FUNCTION_ARG_SIZE (mode, type);
8855
8856 /* If this arg would be passed partially or totally on the stack, then
8857 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
8858 handle arguments which are split between regs and stack slots if
8859 the ABI mandates split arguments. */
8860 if (! TARGET_64BIT)
8861 {
8862 /* The 32-bit ABI does not split arguments. */
8863 if (cum->words + arg_size > max_arg_words)
8864 return NULL_RTX;
8865 }
8866 else
8867 {
8868 if (arg_size > 1)
8869 alignment = cum->words & 1;
8870 if (cum->words + alignment >= max_arg_words)
8871 return NULL_RTX;
8872 }
8873
8874 /* The 32bit ABIs and the 64bit ABIs are rather different,
8875 particularly in their handling of FP registers. We might
8876 be able to cleverly share code between them, but I'm not
8877 going to bother in the hope that splitting them up results
8878 in code that is more easily understood. */
8879
8880 if (TARGET_64BIT)
8881 {
8882 /* Advance the base registers to their current locations.
8883
8884 Remember, gprs grow towards smaller register numbers while
8885 fprs grow to higher register numbers. Also remember that
8886 although FP regs are 32-bit addressable, we pretend that
8887 the registers are 64-bits wide. */
8888 gpr_reg_base = 26 - cum->words;
8889 fpr_reg_base = 32 + cum->words;
8890
8891 /* Arguments wider than one word and small aggregates need special
8892 treatment. */
8893 if (arg_size > 1
8894 || mode == BLKmode
8895 || (type && AGGREGATE_TYPE_P (type)))
8896 {
8897 /* Double-extended precision (80-bit), quad-precision (128-bit)
8898 and aggregates including complex numbers are aligned on
8899 128-bit boundaries. The first eight 64-bit argument slots
8900 are associated one-to-one, with general registers r26
8901 through r19, and also with floating-point registers fr4
8902 through fr11. Arguments larger than one word are always
8903 passed in general registers.
8904
8905 Using a PARALLEL with a word mode register results in left
8906 justified data on a big-endian target. */
8907
8908 rtx loc[8];
8909 int i, offset = 0, ub = arg_size;
8910
8911 /* Align the base register. */
8912 gpr_reg_base -= alignment;
8913
8914 ub = MIN (ub, max_arg_words - cum->words - alignment);
8915 for (i = 0; i < ub; i++)
8916 {
8917 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
8918 gen_rtx_REG (DImode, gpr_reg_base),
8919 GEN_INT (offset));
8920 gpr_reg_base -= 1;
8921 offset += 8;
8922 }
8923
8924 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
8925 }
8926 }
8927 else
8928 {
8929 /* If the argument is larger than a word, then we know precisely
8930 which registers we must use. */
8931 if (arg_size > 1)
8932 {
8933 if (cum->words)
8934 {
8935 gpr_reg_base = 23;
8936 fpr_reg_base = 38;
8937 }
8938 else
8939 {
8940 gpr_reg_base = 25;
8941 fpr_reg_base = 34;
8942 }
8943
8944 /* Structures 5 to 8 bytes in size are passed in the general
8945 registers in the same manner as other non floating-point
8946 objects. The data is right-justified and zero-extended
8947 to 64 bits.
8948
8949 This is magic. Normally, using a PARALLEL results in left
8950 justified data on a big-endian target. However, using a
8951 single double-word register provides the required right
8952 justification for 5 to 8 byte structures. This has nothing
8953 to do with the direction of padding specified for the argument.
8954 It has to do with how the data is widened and shifted into
8955 and from the register.
8956
8957 Aside from adding load_multiple and store_multiple patterns,
8958 this is the only way that I have found to obtain right
8959 justification of BLKmode data when it has a size greater
8960 than one word. Splitting the operation into two SImode loads
8961 or returning a DImode REG results in left justified data. */
8962 if (mode == BLKmode)
8963 {
8964 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
8965 gen_rtx_REG (DImode, gpr_reg_base),
8966 const0_rtx);
8967 return gen_rtx_PARALLEL (mode, gen_rtvec (1, loc));
8968 }
8969 }
8970 else
8971 {
8972 /* We have a single word (32 bits). A simple computation
8973 will get us the register #s we need. */
8974 gpr_reg_base = 26 - cum->words;
8975 fpr_reg_base = 32 + 2 * cum->words;
8976 }
8977 }
8978
8979 /* Determine if the argument needs to be passed in both general and
8980 floating point registers. */
8981 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
8982 /* If we are doing soft-float with portable runtime, then there
8983 is no need to worry about FP regs. */
8984 && !TARGET_SOFT_FLOAT
8985 /* The parameter must be some kind of float, else we can just
8986 pass it in integer registers. */
8987 && FLOAT_MODE_P (mode)
8988 /* The target function must not have a prototype. */
8989 && cum->nargs_prototype <= 0
8990 /* libcalls do not need to pass items in both FP and general
8991 registers. */
8992 && type != NULL_TREE
8993 /* All this hair applies to "outgoing" args only. This includes
8994 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
8995 && !cum->incoming)
8996 /* Also pass outgoing floating arguments in both registers in indirect
8997 calls with the 32 bit ABI and the HP assembler since there is no
8998 way to the specify argument locations in static functions. */
8999 || (!TARGET_64BIT
9000 && !TARGET_GAS
9001 && !cum->incoming
9002 && cum->indirect
9003 && FLOAT_MODE_P (mode)))
9004 {
9005 retval
9006 = gen_rtx_PARALLEL
9007 (mode,
9008 gen_rtvec (2,
9009 gen_rtx_EXPR_LIST (VOIDmode,
9010 gen_rtx_REG (mode, fpr_reg_base),
9011 const0_rtx),
9012 gen_rtx_EXPR_LIST (VOIDmode,
9013 gen_rtx_REG (mode, gpr_reg_base),
9014 const0_rtx)));
9015 }
9016 else
9017 {
9018 /* See if we should pass this parameter in a general register. */
9019 if (TARGET_SOFT_FLOAT
9020 /* Indirect calls in the normal 32bit ABI require all arguments
9021 to be passed in general registers. */
9022 || (!TARGET_PORTABLE_RUNTIME
9023 && !TARGET_64BIT
9024 && !TARGET_ELF32
9025 && cum->indirect)
9026 /* If the parameter is not a floating point parameter, then
9027 it belongs in GPRs. */
9028 || !FLOAT_MODE_P (mode))
9029 retval = gen_rtx_REG (mode, gpr_reg_base);
9030 else
9031 retval = gen_rtx_REG (mode, fpr_reg_base);
9032 }
9033 return retval;
9034 }
9035
9036
9037 /* If this arg would be passed totally in registers or totally on the stack,
9038 then this routine should return zero. It is currently called only for
9039 the 64-bit target. */
9040 int
9041 function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9042 tree type, int named ATTRIBUTE_UNUSED)
9043 {
9044 unsigned int max_arg_words = 8;
9045 unsigned int offset = 0;
9046
9047 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9048 offset = 1;
9049
9050 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9051 /* Arg fits fully into registers. */
9052 return 0;
9053 else if (cum->words + offset >= max_arg_words)
9054 /* Arg fully on the stack. */
9055 return 0;
9056 else
9057 /* Arg is split. */
9058 return max_arg_words - cum->words - offset;
9059 }
9060
9061
9062 /* Return 1 if this is a comparison operator. This allows the use of
9063 MATCH_OPERATOR to recognize all the branch insns. */
9064
9065 int
9066 cmpib_comparison_operator (rtx op, enum machine_mode mode)
9067 {
9068 return ((mode == VOIDmode || GET_MODE (op) == mode)
9069 && (GET_CODE (op) == EQ
9070 || GET_CODE (op) == NE
9071 || GET_CODE (op) == GT
9072 || GET_CODE (op) == GTU
9073 || GET_CODE (op) == GE
9074 || GET_CODE (op) == LT
9075 || GET_CODE (op) == LE
9076 || GET_CODE (op) == LEU));
9077 }
9078
9079 /* On hpux10, the linker will give an error if we have a reference
9080 in the read-only data section to a symbol defined in a shared
9081 library. Therefore, expressions that might require a reloc can
9082 not be placed in the read-only data section. */
9083
9084 static void
9085 pa_select_section (tree exp, int reloc,
9086 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9087 {
9088 if (TREE_CODE (exp) == VAR_DECL
9089 && TREE_READONLY (exp)
9090 && !TREE_THIS_VOLATILE (exp)
9091 && DECL_INITIAL (exp)
9092 && (DECL_INITIAL (exp) == error_mark_node
9093 || TREE_CONSTANT (DECL_INITIAL (exp)))
9094 && !reloc)
9095 readonly_data_section ();
9096 else if (TREE_CODE_CLASS (TREE_CODE (exp)) == 'c'
9097 && !reloc)
9098 readonly_data_section ();
9099 else
9100 data_section ();
9101 }
9102
9103 static void
9104 pa_globalize_label (FILE *stream, const char *name)
9105 {
9106 /* We only handle DATA objects here, functions are globalized in
9107 ASM_DECLARE_FUNCTION_NAME. */
9108 if (! FUNCTION_NAME_P (name))
9109 {
9110 fputs ("\t.EXPORT ", stream);
9111 assemble_name (stream, name);
9112 fputs (",DATA\n", stream);
9113 }
9114 }
9115
9116 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9117
9118 static rtx
9119 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9120 int incoming ATTRIBUTE_UNUSED)
9121 {
9122 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9123 }
9124
9125 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9126
9127 bool
9128 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9129 {
9130 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9131 PA64 ABI says that objects larger than 128 bits are returned in memory.
9132 Note, int_size_in_bytes can return -1 if the size of the object is
9133 variable or larger than the maximum value that can be expressed as
9134 a HOST_WIDE_INT. It can also return zero for an empty type. The
9135 simplest way to handle variable and empty types is to pass them in
9136 memory. This avoids problems in defining the boundaries of argument
9137 slots, allocating registers, etc. */
9138 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9139 || int_size_in_bytes (type) <= 0);
9140 }
9141
9142 #include "gt-pa.h"