pa.c (hppa_init_pic_save): Set rtx_unchanging for PIC_OFFSET_TABLE_SAVE_RTX.
[gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "expr.h"
37 #include "c-tree.h"
38 #include "function.h"
39 #include "obstack.h"
40 #include "toplev.h"
41 #include "ggc.h"
42 #include "recog.h"
43 #include "tm_p.h"
44
45 #ifndef DO_FRAME_NOTES
46 #ifdef INCOMING_RETURN_ADDR_RTX
47 #define DO_FRAME_NOTES 1
48 #else
49 #define DO_FRAME_NOTES 0
50 #endif
51 #endif
52
53 static void pa_init_machine_status PARAMS ((struct function *));
54 static void pa_mark_machine_status PARAMS ((struct function *));
55 static void pa_free_machine_status PARAMS ((struct function *));
56 static void pa_combine_instructions PARAMS ((rtx));
57 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
58 static int forward_branch_p PARAMS ((rtx));
59 static int shadd_constant_p PARAMS ((int));
60 static void pa_add_gc_roots PARAMS ((void));
61 static void mark_deferred_plabels PARAMS ((void *));
62 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
63 static int compute_movstrsi_length PARAMS ((rtx));
64 static void remove_useless_addtr_insns PARAMS ((rtx, int));
65 static rtx store_reg PARAMS ((int, int, int));
66 static rtx load_reg PARAMS ((int, int, int));
67 static rtx set_reg_plus_d PARAMS ((int, int, int));
68
69 /* Save the operands last given to a compare for use when we
70 generate a scc or bcc insn. */
71
72 rtx hppa_compare_op0, hppa_compare_op1;
73 enum cmp_type hppa_branch_type;
74
75 /* Which cpu we are scheduling for. */
76 enum processor_type pa_cpu;
77
78 /* String to hold which cpu we are scheduling for. */
79 const char *pa_cpu_string;
80
81 /* Which architecture we are generating code for. */
82 enum architecture_type pa_arch;
83
84 /* String to hold which architecture we are generating code for. */
85 const char *pa_arch_string;
86
87 /* Counts for the number of callee-saved general and floating point
88 registers which were saved by the current function's prologue. */
89 static int gr_saved, fr_saved;
90
91 static rtx find_addr_reg PARAMS ((rtx));
92
93 /* Keep track of the number of bytes we have output in the CODE subspaces
94 during this compilation so we'll know when to emit inline long-calls. */
95
96 unsigned int total_code_bytes;
97
98 /* Variables to handle plabels that we discover are necessary at assembly
99 output time. They are output after the current function. */
100
101 struct deferred_plabel
102 {
103 rtx internal_label;
104 char *name;
105 } *deferred_plabels = 0;
106 int n_deferred_plabels = 0;
107
108 void
109 override_options ()
110 {
111 /* Default to 7100LC scheduling. */
112 if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100"))
113 {
114 pa_cpu_string = "7100";
115 pa_cpu = PROCESSOR_7100;
116 }
117 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "700"))
118 {
119 pa_cpu_string = "700";
120 pa_cpu = PROCESSOR_700;
121 }
122 else if (pa_cpu_string == NULL
123 || ! strcmp (pa_cpu_string, "7100LC"))
124 {
125 pa_cpu_string = "7100LC";
126 pa_cpu = PROCESSOR_7100LC;
127 }
128 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7200"))
129 {
130 pa_cpu_string = "7200";
131 pa_cpu = PROCESSOR_7200;
132 }
133 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000"))
134 {
135 pa_cpu_string = "8000";
136 pa_cpu = PROCESSOR_8000;
137 }
138 else
139 {
140 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string);
141 }
142
143 /* Set the instruction set architecture. */
144 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
145 {
146 pa_arch_string = "1.0";
147 pa_arch = ARCHITECTURE_10;
148 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
149 }
150 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
151 {
152 pa_arch_string = "1.1";
153 pa_arch = ARCHITECTURE_11;
154 target_flags &= ~MASK_PA_20;
155 target_flags |= MASK_PA_11;
156 }
157 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
158 {
159 pa_arch_string = "2.0";
160 pa_arch = ARCHITECTURE_20;
161 target_flags |= MASK_PA_11 | MASK_PA_20;
162 }
163 else if (pa_arch_string)
164 {
165 warning ("Unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
166 }
167
168 if (flag_pic && TARGET_PORTABLE_RUNTIME)
169 {
170 warning ("PIC code generation is not supported in the portable runtime model\n");
171 }
172
173 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
174 {
175 warning ("PIC code generation is not compatible with fast indirect calls\n");
176 }
177
178 if (! TARGET_GAS && write_symbols != NO_DEBUG)
179 {
180 warning ("-g is only supported when using GAS on this processor,");
181 warning ("-g option disabled.");
182 write_symbols = NO_DEBUG;
183 }
184
185 /* We only support the "big PIC" model now. And we always generate PIC
186 code when in 64bit mode. */
187 if (flag_pic == 1 || TARGET_64BIT)
188 flag_pic = 2;
189
190 /* Register global variables with the garbage collector. */
191 pa_add_gc_roots ();
192
193 /* Arrange to save and restore machine status around nested functions. */
194 init_machine_status = pa_init_machine_status;
195 mark_machine_status = pa_mark_machine_status;
196 free_machine_status = pa_free_machine_status;
197 }
198
199 /* Functions to initialize pic_offset_table_save_rtx.
200 These will be called, via pointer variables,
201 from push_function_context and pop_function_context. */
202
203 static void
204 pa_init_machine_status (p)
205 struct function *p;
206 {
207 p->machine = (machine_function *) xmalloc (sizeof (machine_function));
208
209 p->machine->pic_offset_table_save_rtx = NULL_RTX;
210 }
211
212 static void
213 pa_mark_machine_status (p)
214 struct function *p;
215 {
216 if (p->machine)
217 ggc_mark_rtx (p->machine->pic_offset_table_save_rtx);
218 }
219
220 static void
221 pa_free_machine_status (p)
222 struct function *p;
223 {
224 if (p->machine == NULL)
225 return;
226
227 free (p->machine);
228 p->machine = NULL;
229 }
230
231
232 /* Return non-zero only if OP is a register of mode MODE,
233 or CONST0_RTX. */
234 int
235 reg_or_0_operand (op, mode)
236 rtx op;
237 enum machine_mode mode;
238 {
239 return (op == CONST0_RTX (mode) || register_operand (op, mode));
240 }
241
242 /* Return non-zero if OP is suitable for use in a call to a named
243 function.
244
245 For 2.5 try to eliminate either call_operand_address or
246 function_label_operand, they perform very similar functions. */
247 int
248 call_operand_address (op, mode)
249 rtx op;
250 enum machine_mode mode ATTRIBUTE_UNUSED;
251 {
252 return (GET_MODE (op) == word_mode
253 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
254 }
255
256 /* Return 1 if X contains a symbolic expression. We know these
257 expressions will have one of a few well defined forms, so
258 we need only check those forms. */
259 int
260 symbolic_expression_p (x)
261 register rtx x;
262 {
263
264 /* Strip off any HIGH. */
265 if (GET_CODE (x) == HIGH)
266 x = XEXP (x, 0);
267
268 return (symbolic_operand (x, VOIDmode));
269 }
270
271 int
272 symbolic_operand (op, mode)
273 register rtx op;
274 enum machine_mode mode ATTRIBUTE_UNUSED;
275 {
276 switch (GET_CODE (op))
277 {
278 case SYMBOL_REF:
279 case LABEL_REF:
280 return 1;
281 case CONST:
282 op = XEXP (op, 0);
283 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
284 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
285 && GET_CODE (XEXP (op, 1)) == CONST_INT);
286 default:
287 return 0;
288 }
289 }
290
291 /* Return truth value of statement that OP is a symbolic memory
292 operand of mode MODE. */
293
294 int
295 symbolic_memory_operand (op, mode)
296 rtx op;
297 enum machine_mode mode ATTRIBUTE_UNUSED;
298 {
299 if (GET_CODE (op) == SUBREG)
300 op = SUBREG_REG (op);
301 if (GET_CODE (op) != MEM)
302 return 0;
303 op = XEXP (op, 0);
304 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
305 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
306 }
307
308 /* Return 1 if the operand is either a register or a memory operand that is
309 not symbolic. */
310
311 int
312 reg_or_nonsymb_mem_operand (op, mode)
313 register rtx op;
314 enum machine_mode mode;
315 {
316 if (register_operand (op, mode))
317 return 1;
318
319 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
320 return 1;
321
322 return 0;
323 }
324
325 /* Return 1 if the operand is either a register, zero, or a memory operand
326 that is not symbolic. */
327
328 int
329 reg_or_0_or_nonsymb_mem_operand (op, mode)
330 register rtx op;
331 enum machine_mode mode;
332 {
333 if (register_operand (op, mode))
334 return 1;
335
336 if (op == CONST0_RTX (mode))
337 return 1;
338
339 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
340 return 1;
341
342 return 0;
343 }
344
345 /* Accept any constant that can be moved in one instructions into a
346 general register. */
347 int
348 cint_ok_for_move (intval)
349 HOST_WIDE_INT intval;
350 {
351 /* OK if ldo, ldil, or zdepi, can be used. */
352 return (CONST_OK_FOR_LETTER_P (intval, 'J')
353 || CONST_OK_FOR_LETTER_P (intval, 'N')
354 || CONST_OK_FOR_LETTER_P (intval, 'K'));
355 }
356
357 /* Accept anything that can be moved in one instruction into a general
358 register. */
359 int
360 move_operand (op, mode)
361 rtx op;
362 enum machine_mode mode;
363 {
364 if (register_operand (op, mode))
365 return 1;
366
367 if (GET_CODE (op) == CONSTANT_P_RTX)
368 return 1;
369
370 if (GET_CODE (op) == CONST_INT)
371 return cint_ok_for_move (INTVAL (op));
372
373 if (GET_CODE (op) == SUBREG)
374 op = SUBREG_REG (op);
375 if (GET_CODE (op) != MEM)
376 return 0;
377
378 op = XEXP (op, 0);
379
380 /* We consider a LO_SUM DLT reference a move_operand now since it has
381 been merged into the normal movsi/movdi patterns. */
382 if (GET_CODE (op) == LO_SUM
383 && GET_CODE (XEXP (op, 0)) == REG
384 && REG_OK_FOR_BASE_P (XEXP (op, 0))
385 && GET_CODE (XEXP (op, 1)) == UNSPEC
386 && GET_MODE (op) == Pmode)
387 return 1;
388
389 /* Since move_operand is only used for source operands, we can always
390 allow scaled indexing! */
391 if (! TARGET_DISABLE_INDEXING
392 && GET_CODE (op) == PLUS
393 && ((GET_CODE (XEXP (op, 0)) == MULT
394 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
395 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
396 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
397 && GET_CODE (XEXP (op, 1)) == REG)
398 || (GET_CODE (XEXP (op, 1)) == MULT
399 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
400 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
401 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
402 && GET_CODE (XEXP (op, 0)) == REG)))
403 return 1;
404
405 return memory_address_p (mode, op);
406 }
407
408 /* Accept REG and any CONST_INT that can be moved in one instruction into a
409 general register. */
410 int
411 reg_or_cint_move_operand (op, mode)
412 rtx op;
413 enum machine_mode mode;
414 {
415 if (register_operand (op, mode))
416 return 1;
417
418 if (GET_CODE (op) == CONST_INT)
419 return cint_ok_for_move (INTVAL (op));
420
421 return 0;
422 }
423
424 int
425 pic_label_operand (op, mode)
426 rtx op;
427 enum machine_mode mode ATTRIBUTE_UNUSED;
428 {
429 if (!flag_pic)
430 return 0;
431
432 switch (GET_CODE (op))
433 {
434 case LABEL_REF:
435 return 1;
436 case CONST:
437 op = XEXP (op, 0);
438 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
439 && GET_CODE (XEXP (op, 1)) == CONST_INT);
440 default:
441 return 0;
442 }
443 }
444
445 int
446 fp_reg_operand (op, mode)
447 rtx op;
448 enum machine_mode mode ATTRIBUTE_UNUSED;
449 {
450 return reg_renumber && FP_REG_P (op);
451 }
452
453 \f
454
455 /* Return truth value of whether OP can be used as an operand in a
456 three operand arithmetic insn that accepts registers of mode MODE
457 or 14-bit signed integers. */
458 int
459 arith_operand (op, mode)
460 rtx op;
461 enum machine_mode mode;
462 {
463 return (register_operand (op, mode)
464 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
465 }
466
467 /* Return truth value of whether OP can be used as an operand in a
468 three operand arithmetic insn that accepts registers of mode MODE
469 or 11-bit signed integers. */
470 int
471 arith11_operand (op, mode)
472 rtx op;
473 enum machine_mode mode;
474 {
475 return (register_operand (op, mode)
476 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
477 }
478
479 /* A constant integer suitable for use in a PRE_MODIFY memory
480 reference. */
481 int
482 pre_cint_operand (op, mode)
483 rtx op;
484 enum machine_mode mode ATTRIBUTE_UNUSED;
485 {
486 return (GET_CODE (op) == CONST_INT
487 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
488 }
489
490 /* A constant integer suitable for use in a POST_MODIFY memory
491 reference. */
492 int
493 post_cint_operand (op, mode)
494 rtx op;
495 enum machine_mode mode ATTRIBUTE_UNUSED;
496 {
497 return (GET_CODE (op) == CONST_INT
498 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
499 }
500
501 int
502 arith_double_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
505 {
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_DOUBLE
508 && GET_MODE (op) == mode
509 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
510 && ((CONST_DOUBLE_HIGH (op) >= 0)
511 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
512 }
513
514 /* Return truth value of whether OP is a integer which fits the
515 range constraining immediate operands in three-address insns, or
516 is an integer register. */
517
518 int
519 ireg_or_int5_operand (op, mode)
520 rtx op;
521 enum machine_mode mode ATTRIBUTE_UNUSED;
522 {
523 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
524 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
525 }
526
527 /* Return nonzero if OP is an integer register, else return zero. */
528 int
529 ireg_operand (op, mode)
530 rtx op;
531 enum machine_mode mode ATTRIBUTE_UNUSED;
532 {
533 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
534 }
535
536 /* Return truth value of whether OP is a integer which fits the
537 range constraining immediate operands in three-address insns. */
538
539 int
540 int5_operand (op, mode)
541 rtx op;
542 enum machine_mode mode ATTRIBUTE_UNUSED;
543 {
544 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
545 }
546
547 int
548 uint5_operand (op, mode)
549 rtx op;
550 enum machine_mode mode ATTRIBUTE_UNUSED;
551 {
552 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
553 }
554
555 int
556 int11_operand (op, mode)
557 rtx op;
558 enum machine_mode mode ATTRIBUTE_UNUSED;
559 {
560 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
561 }
562
563 int
564 uint32_operand (op, mode)
565 rtx op;
566 enum machine_mode mode ATTRIBUTE_UNUSED;
567 {
568 #if HOST_BITS_PER_WIDE_INT > 32
569 /* All allowed constants will fit a CONST_INT. */
570 return (GET_CODE (op) == CONST_INT
571 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
572 #else
573 return (GET_CODE (op) == CONST_INT
574 || (GET_CODE (op) == CONST_DOUBLE
575 && CONST_DOUBLE_HIGH (op) == 0));
576 #endif
577 }
578
579 int
580 arith5_operand (op, mode)
581 rtx op;
582 enum machine_mode mode;
583 {
584 return register_operand (op, mode) || int5_operand (op, mode);
585 }
586
587 /* True iff zdepi can be used to generate this CONST_INT. */
588 int
589 zdepi_cint_p (x)
590 unsigned HOST_WIDE_INT x;
591 {
592 unsigned HOST_WIDE_INT lsb_mask, t;
593
594 /* This might not be obvious, but it's at least fast.
595 This function is critical; we don't have the time loops would take. */
596 lsb_mask = x & -x;
597 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
598 /* Return true iff t is a power of two. */
599 return ((t & (t - 1)) == 0);
600 }
601
602 /* True iff depi or extru can be used to compute (reg & mask).
603 Accept bit pattern like these:
604 0....01....1
605 1....10....0
606 1..10..01..1 */
607 int
608 and_mask_p (mask)
609 unsigned HOST_WIDE_INT mask;
610 {
611 mask = ~mask;
612 mask += mask & -mask;
613 return (mask & (mask - 1)) == 0;
614 }
615
616 /* True iff depi or extru can be used to compute (reg & OP). */
617 int
618 and_operand (op, mode)
619 rtx op;
620 enum machine_mode mode;
621 {
622 return (register_operand (op, mode)
623 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
624 }
625
626 /* True iff depi can be used to compute (reg | MASK). */
627 int
628 ior_mask_p (mask)
629 unsigned HOST_WIDE_INT mask;
630 {
631 mask += mask & -mask;
632 return (mask & (mask - 1)) == 0;
633 }
634
635 /* True iff depi can be used to compute (reg | OP). */
636 int
637 ior_operand (op, mode)
638 rtx op;
639 enum machine_mode mode ATTRIBUTE_UNUSED;
640 {
641 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
642 }
643
644 int
645 lhs_lshift_operand (op, mode)
646 rtx op;
647 enum machine_mode mode;
648 {
649 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
650 }
651
652 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
653 Such values can be the left hand side x in (x << r), using the zvdepi
654 instruction. */
655 int
656 lhs_lshift_cint_operand (op, mode)
657 rtx op;
658 enum machine_mode mode ATTRIBUTE_UNUSED;
659 {
660 unsigned HOST_WIDE_INT x;
661 if (GET_CODE (op) != CONST_INT)
662 return 0;
663 x = INTVAL (op) >> 4;
664 return (x & (x + 1)) == 0;
665 }
666
667 int
668 arith32_operand (op, mode)
669 rtx op;
670 enum machine_mode mode;
671 {
672 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
673 }
674
675 int
676 pc_or_label_operand (op, mode)
677 rtx op;
678 enum machine_mode mode ATTRIBUTE_UNUSED;
679 {
680 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
681 }
682 \f
683 /* Legitimize PIC addresses. If the address is already
684 position-independent, we return ORIG. Newly generated
685 position-independent addresses go to REG. If we need more
686 than one register, we lose. */
687
688 rtx
689 legitimize_pic_address (orig, mode, reg)
690 rtx orig, reg;
691 enum machine_mode mode;
692 {
693 rtx pic_ref = orig;
694
695 /* Labels need special handling. */
696 if (pic_label_operand (orig, mode))
697 {
698 /* We do not want to go through the movXX expanders here since that
699 would create recursion.
700
701 Nor do we really want to call a generator for a named pattern
702 since that requires multiple patterns if we want to support
703 multiple word sizes.
704
705 So instead we just emit the raw set, which avoids the movXX
706 expanders completely. */
707 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
708 current_function_uses_pic_offset_table = 1;
709 return reg;
710 }
711 if (GET_CODE (orig) == SYMBOL_REF)
712 {
713 if (reg == 0)
714 abort ();
715
716 emit_move_insn (reg,
717 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
718 gen_rtx_HIGH (word_mode, orig)));
719 pic_ref
720 = gen_rtx_MEM (Pmode,
721 gen_rtx_LO_SUM (Pmode, reg,
722 gen_rtx_UNSPEC (Pmode,
723 gen_rtvec (1, orig),
724 0)));
725
726 current_function_uses_pic_offset_table = 1;
727 RTX_UNCHANGING_P (pic_ref) = 1;
728 emit_move_insn (reg, pic_ref);
729 return reg;
730 }
731 else if (GET_CODE (orig) == CONST)
732 {
733 rtx base;
734
735 if (GET_CODE (XEXP (orig, 0)) == PLUS
736 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
737 return orig;
738
739 if (reg == 0)
740 abort ();
741
742 if (GET_CODE (XEXP (orig, 0)) == PLUS)
743 {
744 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
745 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
746 base == reg ? 0 : reg);
747 }
748 else abort ();
749 if (GET_CODE (orig) == CONST_INT)
750 {
751 if (INT_14_BITS (orig))
752 return plus_constant_for_output (base, INTVAL (orig));
753 orig = force_reg (Pmode, orig);
754 }
755 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
756 /* Likewise, should we set special REG_NOTEs here? */
757 }
758 return pic_ref;
759 }
760
761 /* Try machine-dependent ways of modifying an illegitimate address
762 to be legitimate. If we find one, return the new, valid address.
763 This macro is used in only one place: `memory_address' in explow.c.
764
765 OLDX is the address as it was before break_out_memory_refs was called.
766 In some cases it is useful to look at this to decide what needs to be done.
767
768 MODE and WIN are passed so that this macro can use
769 GO_IF_LEGITIMATE_ADDRESS.
770
771 It is always safe for this macro to do nothing. It exists to recognize
772 opportunities to optimize the output.
773
774 For the PA, transform:
775
776 memory(X + <large int>)
777
778 into:
779
780 if (<large int> & mask) >= 16
781 Y = (<large int> & ~mask) + mask + 1 Round up.
782 else
783 Y = (<large int> & ~mask) Round down.
784 Z = X + Y
785 memory (Z + (<large int> - Y));
786
787 This is for CSE to find several similar references, and only use one Z.
788
789 X can either be a SYMBOL_REF or REG, but because combine can not
790 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
791 D will not fit in 14 bits.
792
793 MODE_FLOAT references allow displacements which fit in 5 bits, so use
794 0x1f as the mask.
795
796 MODE_INT references allow displacements which fit in 14 bits, so use
797 0x3fff as the mask.
798
799 This relies on the fact that most mode MODE_FLOAT references will use FP
800 registers and most mode MODE_INT references will use integer registers.
801 (In the rare case of an FP register used in an integer MODE, we depend
802 on secondary reloads to clean things up.)
803
804
805 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
806 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
807 addressing modes to be used).
808
809 Put X and Z into registers. Then put the entire expression into
810 a register. */
811
812 rtx
813 hppa_legitimize_address (x, oldx, mode)
814 rtx x, oldx ATTRIBUTE_UNUSED;
815 enum machine_mode mode;
816 {
817 rtx orig = x;
818
819 if (flag_pic)
820 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
821
822 /* Strip off CONST. */
823 if (GET_CODE (x) == CONST)
824 x = XEXP (x, 0);
825
826 /* Special case. Get the SYMBOL_REF into a register and use indexing.
827 That should always be safe. */
828 if (GET_CODE (x) == PLUS
829 && GET_CODE (XEXP (x, 0)) == REG
830 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
831 {
832 rtx reg = force_reg (Pmode, XEXP (x, 1));
833 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
834 }
835
836 /* Note we must reject symbols which represent function addresses
837 since the assembler/linker can't handle arithmetic on plabels. */
838 if (GET_CODE (x) == PLUS
839 && GET_CODE (XEXP (x, 1)) == CONST_INT
840 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
841 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
842 || GET_CODE (XEXP (x, 0)) == REG))
843 {
844 rtx int_part, ptr_reg;
845 int newoffset;
846 int offset = INTVAL (XEXP (x, 1));
847 int mask;
848
849 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
850 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
851
852 /* Choose which way to round the offset. Round up if we
853 are >= halfway to the next boundary. */
854 if ((offset & mask) >= ((mask + 1) / 2))
855 newoffset = (offset & ~ mask) + mask + 1;
856 else
857 newoffset = (offset & ~ mask);
858
859 /* If the newoffset will not fit in 14 bits (ldo), then
860 handling this would take 4 or 5 instructions (2 to load
861 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
862 add the new offset and the SYMBOL_REF.) Combine can
863 not handle 4->2 or 5->2 combinations, so do not create
864 them. */
865 if (! VAL_14_BITS_P (newoffset)
866 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
867 {
868 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
869 rtx tmp_reg
870 = force_reg (Pmode,
871 gen_rtx_HIGH (Pmode, const_part));
872 ptr_reg
873 = force_reg (Pmode,
874 gen_rtx_LO_SUM (Pmode,
875 tmp_reg, const_part));
876 }
877 else
878 {
879 if (! VAL_14_BITS_P (newoffset))
880 int_part = force_reg (Pmode, GEN_INT (newoffset));
881 else
882 int_part = GEN_INT (newoffset);
883
884 ptr_reg = force_reg (Pmode,
885 gen_rtx_PLUS (Pmode,
886 force_reg (Pmode, XEXP (x, 0)),
887 int_part));
888 }
889 return plus_constant (ptr_reg, offset - newoffset);
890 }
891
892 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
893
894 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
895 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
896 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
897 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
898 || GET_CODE (XEXP (x, 1)) == SUBREG)
899 && GET_CODE (XEXP (x, 1)) != CONST)
900 {
901 int val = INTVAL (XEXP (XEXP (x, 0), 1));
902 rtx reg1, reg2;
903
904 reg1 = XEXP (x, 1);
905 if (GET_CODE (reg1) != REG)
906 reg1 = force_reg (Pmode, force_operand (reg1, 0));
907
908 reg2 = XEXP (XEXP (x, 0), 0);
909 if (GET_CODE (reg2) != REG)
910 reg2 = force_reg (Pmode, force_operand (reg2, 0));
911
912 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
913 gen_rtx_MULT (Pmode,
914 reg2,
915 GEN_INT (val)),
916 reg1));
917 }
918
919 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
920
921 Only do so for floating point modes since this is more speculative
922 and we lose if it's an integer store. */
923 if (GET_CODE (x) == PLUS
924 && GET_CODE (XEXP (x, 0)) == PLUS
925 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
926 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
927 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
928 && (mode == SFmode || mode == DFmode))
929 {
930
931 /* First, try and figure out what to use as a base register. */
932 rtx reg1, reg2, base, idx, orig_base;
933
934 reg1 = XEXP (XEXP (x, 0), 1);
935 reg2 = XEXP (x, 1);
936 base = NULL_RTX;
937 idx = NULL_RTX;
938
939 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
940 then emit_move_sequence will turn on REG_POINTER so we'll know
941 it's a base register below. */
942 if (GET_CODE (reg1) != REG)
943 reg1 = force_reg (Pmode, force_operand (reg1, 0));
944
945 if (GET_CODE (reg2) != REG)
946 reg2 = force_reg (Pmode, force_operand (reg2, 0));
947
948 /* Figure out what the base and index are. */
949
950 if (GET_CODE (reg1) == REG
951 && REG_POINTER (reg1))
952 {
953 base = reg1;
954 orig_base = XEXP (XEXP (x, 0), 1);
955 idx = gen_rtx_PLUS (Pmode,
956 gen_rtx_MULT (Pmode,
957 XEXP (XEXP (XEXP (x, 0), 0), 0),
958 XEXP (XEXP (XEXP (x, 0), 0), 1)),
959 XEXP (x, 1));
960 }
961 else if (GET_CODE (reg2) == REG
962 && REG_POINTER (reg2))
963 {
964 base = reg2;
965 orig_base = XEXP (x, 1);
966 idx = XEXP (x, 0);
967 }
968
969 if (base == 0)
970 return orig;
971
972 /* If the index adds a large constant, try to scale the
973 constant so that it can be loaded with only one insn. */
974 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
975 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
976 / INTVAL (XEXP (XEXP (idx, 0), 1)))
977 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
978 {
979 /* Divide the CONST_INT by the scale factor, then add it to A. */
980 int val = INTVAL (XEXP (idx, 1));
981
982 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
983 reg1 = XEXP (XEXP (idx, 0), 0);
984 if (GET_CODE (reg1) != REG)
985 reg1 = force_reg (Pmode, force_operand (reg1, 0));
986
987 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
988
989 /* We can now generate a simple scaled indexed address. */
990 return
991 force_reg
992 (Pmode, gen_rtx_PLUS (Pmode,
993 gen_rtx_MULT (Pmode, reg1,
994 XEXP (XEXP (idx, 0), 1)),
995 base));
996 }
997
998 /* If B + C is still a valid base register, then add them. */
999 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1000 && INTVAL (XEXP (idx, 1)) <= 4096
1001 && INTVAL (XEXP (idx, 1)) >= -4096)
1002 {
1003 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1004 rtx reg1, reg2;
1005
1006 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1007
1008 reg2 = XEXP (XEXP (idx, 0), 0);
1009 if (GET_CODE (reg2) != CONST_INT)
1010 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1011
1012 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1013 gen_rtx_MULT (Pmode,
1014 reg2,
1015 GEN_INT (val)),
1016 reg1));
1017 }
1018
1019 /* Get the index into a register, then add the base + index and
1020 return a register holding the result. */
1021
1022 /* First get A into a register. */
1023 reg1 = XEXP (XEXP (idx, 0), 0);
1024 if (GET_CODE (reg1) != REG)
1025 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1026
1027 /* And get B into a register. */
1028 reg2 = XEXP (idx, 1);
1029 if (GET_CODE (reg2) != REG)
1030 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1031
1032 reg1 = force_reg (Pmode,
1033 gen_rtx_PLUS (Pmode,
1034 gen_rtx_MULT (Pmode, reg1,
1035 XEXP (XEXP (idx, 0), 1)),
1036 reg2));
1037
1038 /* Add the result to our base register and return. */
1039 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1040
1041 }
1042
1043 /* Uh-oh. We might have an address for x[n-100000]. This needs
1044 special handling to avoid creating an indexed memory address
1045 with x-100000 as the base.
1046
1047 If the constant part is small enough, then it's still safe because
1048 there is a guard page at the beginning and end of the data segment.
1049
1050 Scaled references are common enough that we want to try and rearrange the
1051 terms so that we can use indexing for these addresses too. Only
1052 do the optimization for floatint point modes. */
1053
1054 if (GET_CODE (x) == PLUS
1055 && symbolic_expression_p (XEXP (x, 1)))
1056 {
1057 /* Ugly. We modify things here so that the address offset specified
1058 by the index expression is computed first, then added to x to form
1059 the entire address. */
1060
1061 rtx regx1, regx2, regy1, regy2, y;
1062
1063 /* Strip off any CONST. */
1064 y = XEXP (x, 1);
1065 if (GET_CODE (y) == CONST)
1066 y = XEXP (y, 0);
1067
1068 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1069 {
1070 /* See if this looks like
1071 (plus (mult (reg) (shadd_const))
1072 (const (plus (symbol_ref) (const_int))))
1073
1074 Where const_int is small. In that case the const
1075 expression is a valid pointer for indexing.
1076
1077 If const_int is big, but can be divided evenly by shadd_const
1078 and added to (reg). This allows more scaled indexed addresses. */
1079 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1080 && GET_CODE (XEXP (x, 0)) == MULT
1081 && GET_CODE (XEXP (y, 1)) == CONST_INT
1082 && INTVAL (XEXP (y, 1)) >= -4096
1083 && INTVAL (XEXP (y, 1)) <= 4095
1084 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1085 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1086 {
1087 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1088 rtx reg1, reg2;
1089
1090 reg1 = XEXP (x, 1);
1091 if (GET_CODE (reg1) != REG)
1092 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1093
1094 reg2 = XEXP (XEXP (x, 0), 0);
1095 if (GET_CODE (reg2) != REG)
1096 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1097
1098 return force_reg (Pmode,
1099 gen_rtx_PLUS (Pmode,
1100 gen_rtx_MULT (Pmode,
1101 reg2,
1102 GEN_INT (val)),
1103 reg1));
1104 }
1105 else if ((mode == DFmode || mode == SFmode)
1106 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1107 && GET_CODE (XEXP (x, 0)) == MULT
1108 && GET_CODE (XEXP (y, 1)) == CONST_INT
1109 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1110 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1111 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1112 {
1113 regx1
1114 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1115 / INTVAL (XEXP (XEXP (x, 0), 1))));
1116 regx2 = XEXP (XEXP (x, 0), 0);
1117 if (GET_CODE (regx2) != REG)
1118 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1119 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1120 regx2, regx1));
1121 return
1122 force_reg (Pmode,
1123 gen_rtx_PLUS (Pmode,
1124 gen_rtx_MULT (Pmode, regx2,
1125 XEXP (XEXP (x, 0), 1)),
1126 force_reg (Pmode, XEXP (y, 0))));
1127 }
1128 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1129 && INTVAL (XEXP (y, 1)) >= -4096
1130 && INTVAL (XEXP (y, 1)) <= 4095)
1131 {
1132 /* This is safe because of the guard page at the
1133 beginning and end of the data space. Just
1134 return the original address. */
1135 return orig;
1136 }
1137 else
1138 {
1139 /* Doesn't look like one we can optimize. */
1140 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1141 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1142 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1143 regx1 = force_reg (Pmode,
1144 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1145 regx1, regy2));
1146 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1147 }
1148 }
1149 }
1150
1151 return orig;
1152 }
1153
1154 /* For the HPPA, REG and REG+CONST is cost 0
1155 and addresses involving symbolic constants are cost 2.
1156
1157 PIC addresses are very expensive.
1158
1159 It is no coincidence that this has the same structure
1160 as GO_IF_LEGITIMATE_ADDRESS. */
1161 int
1162 hppa_address_cost (X)
1163 rtx X;
1164 {
1165 if (GET_CODE (X) == PLUS)
1166 return 1;
1167 else if (GET_CODE (X) == LO_SUM)
1168 return 1;
1169 else if (GET_CODE (X) == HIGH)
1170 return 2;
1171 return 4;
1172 }
1173
1174 /* Emit insns to move operands[1] into operands[0].
1175
1176 Return 1 if we have written out everything that needs to be done to
1177 do the move. Otherwise, return 0 and the caller will emit the move
1178 normally.
1179
1180 Note SCRATCH_REG may not be in the proper mode depending on how it
1181 will be used. This routine is resposible for creating a new copy
1182 of SCRATCH_REG in the proper mode. */
1183
1184 int
1185 emit_move_sequence (operands, mode, scratch_reg)
1186 rtx *operands;
1187 enum machine_mode mode;
1188 rtx scratch_reg;
1189 {
1190 register rtx operand0 = operands[0];
1191 register rtx operand1 = operands[1];
1192 register rtx tem;
1193
1194 if (scratch_reg
1195 && reload_in_progress && GET_CODE (operand0) == REG
1196 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1197 operand0 = reg_equiv_mem[REGNO (operand0)];
1198 else if (scratch_reg
1199 && reload_in_progress && GET_CODE (operand0) == SUBREG
1200 && GET_CODE (SUBREG_REG (operand0)) == REG
1201 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1202 {
1203 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1204 the code which tracks sets/uses for delete_output_reload. */
1205 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1206 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1207 SUBREG_BYTE (operand0));
1208 operand0 = alter_subreg (temp);
1209 }
1210
1211 if (scratch_reg
1212 && reload_in_progress && GET_CODE (operand1) == REG
1213 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1214 operand1 = reg_equiv_mem[REGNO (operand1)];
1215 else if (scratch_reg
1216 && reload_in_progress && GET_CODE (operand1) == SUBREG
1217 && GET_CODE (SUBREG_REG (operand1)) == REG
1218 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1219 {
1220 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1221 the code which tracks sets/uses for delete_output_reload. */
1222 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1223 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1224 SUBREG_BYTE (operand1));
1225 operand1 = alter_subreg (temp);
1226 }
1227
1228 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1229 && ((tem = find_replacement (&XEXP (operand0, 0)))
1230 != XEXP (operand0, 0)))
1231 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1232 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1233 && ((tem = find_replacement (&XEXP (operand1, 0)))
1234 != XEXP (operand1, 0)))
1235 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1236
1237 /* Handle secondary reloads for loads/stores of FP registers from
1238 REG+D addresses where D does not fit in 5 bits, including
1239 (subreg (mem (addr))) cases. */
1240 if (fp_reg_operand (operand0, mode)
1241 && ((GET_CODE (operand1) == MEM
1242 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1243 || ((GET_CODE (operand1) == SUBREG
1244 && GET_CODE (XEXP (operand1, 0)) == MEM
1245 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1246 && scratch_reg)
1247 {
1248 if (GET_CODE (operand1) == SUBREG)
1249 operand1 = XEXP (operand1, 0);
1250
1251 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1252 it in WORD_MODE regardless of what mode it was originally given
1253 to us. */
1254 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1255
1256 /* D might not fit in 14 bits either; for such cases load D into
1257 scratch reg. */
1258 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1259 {
1260 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1261 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1262 Pmode,
1263 XEXP (XEXP (operand1, 0), 0),
1264 scratch_reg));
1265 }
1266 else
1267 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1268 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1269 gen_rtx_MEM (mode, scratch_reg)));
1270 return 1;
1271 }
1272 else if (fp_reg_operand (operand1, mode)
1273 && ((GET_CODE (operand0) == MEM
1274 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1275 || ((GET_CODE (operand0) == SUBREG)
1276 && GET_CODE (XEXP (operand0, 0)) == MEM
1277 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1278 && scratch_reg)
1279 {
1280 if (GET_CODE (operand0) == SUBREG)
1281 operand0 = XEXP (operand0, 0);
1282
1283 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1284 it in WORD_MODE regardless of what mode it was originally given
1285 to us. */
1286 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1287
1288 /* D might not fit in 14 bits either; for such cases load D into
1289 scratch reg. */
1290 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1291 {
1292 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1293 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1294 0)),
1295 Pmode,
1296 XEXP (XEXP (operand0, 0),
1297 0),
1298 scratch_reg));
1299 }
1300 else
1301 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1302 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1303 operand1));
1304 return 1;
1305 }
1306 /* Handle secondary reloads for loads of FP registers from constant
1307 expressions by forcing the constant into memory.
1308
1309 use scratch_reg to hold the address of the memory location.
1310
1311 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1312 NO_REGS when presented with a const_int and an register class
1313 containing only FP registers. Doing so unfortunately creates
1314 more problems than it solves. Fix this for 2.5. */
1315 else if (fp_reg_operand (operand0, mode)
1316 && CONSTANT_P (operand1)
1317 && scratch_reg)
1318 {
1319 rtx xoperands[2];
1320
1321 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1322 it in WORD_MODE regardless of what mode it was originally given
1323 to us. */
1324 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1325
1326 /* Force the constant into memory and put the address of the
1327 memory location into scratch_reg. */
1328 xoperands[0] = scratch_reg;
1329 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1330 emit_move_sequence (xoperands, Pmode, 0);
1331
1332 /* Now load the destination register. */
1333 emit_insn (gen_rtx_SET (mode, operand0,
1334 gen_rtx_MEM (mode, scratch_reg)));
1335 return 1;
1336 }
1337 /* Handle secondary reloads for SAR. These occur when trying to load
1338 the SAR from memory, FP register, or with a constant. */
1339 else if (GET_CODE (operand0) == REG
1340 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1341 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1342 && (GET_CODE (operand1) == MEM
1343 || GET_CODE (operand1) == CONST_INT
1344 || (GET_CODE (operand1) == REG
1345 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1346 && scratch_reg)
1347 {
1348 /* D might not fit in 14 bits either; for such cases load D into
1349 scratch reg. */
1350 if (GET_CODE (operand1) == MEM
1351 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1352 {
1353 /* We are reloading the address into the scratch register, so we
1354 want to make sure the scratch register is a full register. */
1355 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1356
1357 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1358 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1359 0)),
1360 Pmode,
1361 XEXP (XEXP (operand1, 0),
1362 0),
1363 scratch_reg));
1364
1365 /* Now we are going to load the scratch register from memory,
1366 we want to load it in the same width as the original MEM,
1367 which must be the same as the width of the ultimate destination,
1368 OPERAND0. */
1369 scratch_reg = gen_rtx_REG (GET_MODE (operand0), REGNO (scratch_reg));
1370
1371 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1372 scratch_reg));
1373 }
1374 else
1375 {
1376 /* We want to load the scratch register using the same mode as
1377 the ultimate destination. */
1378 scratch_reg = gen_rtx_REG (GET_MODE (operand0), REGNO (scratch_reg));
1379 emit_move_insn (scratch_reg, operand1);
1380 }
1381
1382 /* And emit the insn to set the ultimate destination. We know that
1383 the scratch register has the same mode as the destination at this
1384 point. */
1385 emit_move_insn (operand0, scratch_reg);
1386 return 1;
1387 }
1388 /* Handle most common case: storing into a register. */
1389 else if (register_operand (operand0, mode))
1390 {
1391 if (register_operand (operand1, mode)
1392 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1393 || (operand1 == CONST0_RTX (mode))
1394 || (GET_CODE (operand1) == HIGH
1395 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1396 /* Only `general_operands' can come here, so MEM is ok. */
1397 || GET_CODE (operand1) == MEM)
1398 {
1399 /* Run this case quickly. */
1400 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1401 return 1;
1402 }
1403 }
1404 else if (GET_CODE (operand0) == MEM)
1405 {
1406 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1407 && !(reload_in_progress || reload_completed))
1408 {
1409 rtx temp = gen_reg_rtx (DFmode);
1410
1411 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1412 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1413 return 1;
1414 }
1415 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1416 {
1417 /* Run this case quickly. */
1418 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1419 return 1;
1420 }
1421 if (! (reload_in_progress || reload_completed))
1422 {
1423 operands[0] = validize_mem (operand0);
1424 operands[1] = operand1 = force_reg (mode, operand1);
1425 }
1426 }
1427
1428 /* Simplify the source if we need to.
1429 Note we do have to handle function labels here, even though we do
1430 not consider them legitimate constants. Loop optimizations can
1431 call the emit_move_xxx with one as a source. */
1432 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1433 || function_label_operand (operand1, mode)
1434 || (GET_CODE (operand1) == HIGH
1435 && symbolic_operand (XEXP (operand1, 0), mode)))
1436 {
1437 int ishighonly = 0;
1438
1439 if (GET_CODE (operand1) == HIGH)
1440 {
1441 ishighonly = 1;
1442 operand1 = XEXP (operand1, 0);
1443 }
1444 if (symbolic_operand (operand1, mode))
1445 {
1446 /* Argh. The assembler and linker can't handle arithmetic
1447 involving plabels.
1448
1449 So we force the plabel into memory, load operand0 from
1450 the memory location, then add in the constant part. */
1451 if ((GET_CODE (operand1) == CONST
1452 && GET_CODE (XEXP (operand1, 0)) == PLUS
1453 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1454 || function_label_operand (operand1, mode))
1455 {
1456 rtx temp, const_part;
1457
1458 /* Figure out what (if any) scratch register to use. */
1459 if (reload_in_progress || reload_completed)
1460 {
1461 scratch_reg = scratch_reg ? scratch_reg : operand0;
1462 /* SCRATCH_REG will hold an address and maybe the actual
1463 data. We want it in WORD_MODE regardless of what mode it
1464 was originally given to us. */
1465 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1466 }
1467 else if (flag_pic)
1468 scratch_reg = gen_reg_rtx (Pmode);
1469
1470 if (GET_CODE (operand1) == CONST)
1471 {
1472 /* Save away the constant part of the expression. */
1473 const_part = XEXP (XEXP (operand1, 0), 1);
1474 if (GET_CODE (const_part) != CONST_INT)
1475 abort ();
1476
1477 /* Force the function label into memory. */
1478 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1479 }
1480 else
1481 {
1482 /* No constant part. */
1483 const_part = NULL_RTX;
1484
1485 /* Force the function label into memory. */
1486 temp = force_const_mem (mode, operand1);
1487 }
1488
1489
1490 /* Get the address of the memory location. PIC-ify it if
1491 necessary. */
1492 temp = XEXP (temp, 0);
1493 if (flag_pic)
1494 temp = legitimize_pic_address (temp, mode, scratch_reg);
1495
1496 /* Put the address of the memory location into our destination
1497 register. */
1498 operands[1] = temp;
1499 emit_move_sequence (operands, mode, scratch_reg);
1500
1501 /* Now load from the memory location into our destination
1502 register. */
1503 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1504 emit_move_sequence (operands, mode, scratch_reg);
1505
1506 /* And add back in the constant part. */
1507 if (const_part != NULL_RTX)
1508 expand_inc (operand0, const_part);
1509
1510 return 1;
1511 }
1512
1513 if (flag_pic)
1514 {
1515 rtx temp;
1516
1517 if (reload_in_progress || reload_completed)
1518 {
1519 temp = scratch_reg ? scratch_reg : operand0;
1520 /* TEMP will hold an address and maybe the actual
1521 data. We want it in WORD_MODE regardless of what mode it
1522 was originally given to us. */
1523 temp = gen_rtx_REG (word_mode, REGNO (temp));
1524 }
1525 else
1526 temp = gen_reg_rtx (Pmode);
1527
1528 /* (const (plus (symbol) (const_int))) must be forced to
1529 memory during/after reload if the const_int will not fit
1530 in 14 bits. */
1531 if (GET_CODE (operand1) == CONST
1532 && GET_CODE (XEXP (operand1, 0)) == PLUS
1533 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1534 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1535 && (reload_completed || reload_in_progress)
1536 && flag_pic)
1537 {
1538 operands[1] = force_const_mem (mode, operand1);
1539 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1540 mode, temp);
1541 emit_move_sequence (operands, mode, temp);
1542 }
1543 else
1544 {
1545 operands[1] = legitimize_pic_address (operand1, mode, temp);
1546 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1547 }
1548 }
1549 /* On the HPPA, references to data space are supposed to use dp,
1550 register 27, but showing it in the RTL inhibits various cse
1551 and loop optimizations. */
1552 else
1553 {
1554 rtx temp, set;
1555
1556 if (reload_in_progress || reload_completed)
1557 {
1558 temp = scratch_reg ? scratch_reg : operand0;
1559 /* TEMP will hold an address and maybe the actual
1560 data. We want it in WORD_MODE regardless of what mode it
1561 was originally given to us. */
1562 temp = gen_rtx_REG (word_mode, REGNO (temp));
1563 }
1564 else
1565 temp = gen_reg_rtx (mode);
1566
1567 /* Loading a SYMBOL_REF into a register makes that register
1568 safe to be used as the base in an indexed address.
1569
1570 Don't mark hard registers though. That loses. */
1571 if (GET_CODE (operand0) == REG
1572 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1573 REG_POINTER (operand0) = 1;
1574 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1575 REG_POINTER (temp) = 1;
1576 if (ishighonly)
1577 set = gen_rtx_SET (mode, operand0, temp);
1578 else
1579 set = gen_rtx_SET (VOIDmode,
1580 operand0,
1581 gen_rtx_LO_SUM (mode, temp, operand1));
1582
1583 emit_insn (gen_rtx_SET (VOIDmode,
1584 temp,
1585 gen_rtx_HIGH (mode, operand1)));
1586 emit_insn (set);
1587
1588 }
1589 return 1;
1590 }
1591 else if (GET_CODE (operand1) != CONST_INT
1592 || ! cint_ok_for_move (INTVAL (operand1)))
1593 {
1594 rtx temp;
1595 int need_zero_extend = 0;
1596
1597 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1598 && HOST_BITS_PER_WIDE_INT > 32
1599 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1600 {
1601 HOST_WIDE_INT val = INTVAL (operand1);
1602 HOST_WIDE_INT nval;
1603
1604 /* If the value is the same after a 32->64bit sign
1605 extension, then we can use it as-is. Else we will
1606 need to sign extend the constant from 32->64bits
1607 then zero extend the result from 32->64bits. */
1608 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1609 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1610 if (val != nval)
1611 {
1612 need_zero_extend = 1;
1613 operand1 = GEN_INT (nval);
1614 }
1615 }
1616
1617 if (reload_in_progress || reload_completed)
1618 temp = operand0;
1619 else
1620 temp = gen_reg_rtx (mode);
1621
1622 emit_insn (gen_rtx_SET (VOIDmode, temp,
1623 gen_rtx_HIGH (mode, operand1)));
1624 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1625 emit_move_insn (operands[0], operands[1]);
1626
1627 if (need_zero_extend)
1628 {
1629 emit_insn (gen_zero_extendsidi2 (operands[0],
1630 gen_rtx_SUBREG (SImode,
1631 operands[0],
1632 0)));
1633 }
1634
1635 return 1;
1636 }
1637 }
1638 /* Now have insn-emit do whatever it normally does. */
1639 return 0;
1640 }
1641
1642 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1643 it will need a link/runtime reloc). */
1644
1645 int
1646 reloc_needed (exp)
1647 tree exp;
1648 {
1649 int reloc = 0;
1650
1651 switch (TREE_CODE (exp))
1652 {
1653 case ADDR_EXPR:
1654 return 1;
1655
1656 case PLUS_EXPR:
1657 case MINUS_EXPR:
1658 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1659 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1660 break;
1661
1662 case NOP_EXPR:
1663 case CONVERT_EXPR:
1664 case NON_LVALUE_EXPR:
1665 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1666 break;
1667
1668 case CONSTRUCTOR:
1669 {
1670 register tree link;
1671 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1672 if (TREE_VALUE (link) != 0)
1673 reloc |= reloc_needed (TREE_VALUE (link));
1674 }
1675 break;
1676
1677 case ERROR_MARK:
1678 break;
1679
1680 default:
1681 break;
1682 }
1683 return reloc;
1684 }
1685
1686 /* Does operand (which is a symbolic_operand) live in text space? If
1687 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1688
1689 int
1690 read_only_operand (operand, mode)
1691 rtx operand;
1692 enum machine_mode mode ATTRIBUTE_UNUSED;
1693 {
1694 if (GET_CODE (operand) == CONST)
1695 operand = XEXP (XEXP (operand, 0), 0);
1696 if (flag_pic)
1697 {
1698 if (GET_CODE (operand) == SYMBOL_REF)
1699 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1700 }
1701 else
1702 {
1703 if (GET_CODE (operand) == SYMBOL_REF)
1704 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1705 }
1706 return 1;
1707 }
1708
1709 \f
1710 /* Return the best assembler insn template
1711 for moving operands[1] into operands[0] as a fullword. */
1712 const char *
1713 singlemove_string (operands)
1714 rtx *operands;
1715 {
1716 HOST_WIDE_INT intval;
1717
1718 if (GET_CODE (operands[0]) == MEM)
1719 return "stw %r1,%0";
1720 if (GET_CODE (operands[1]) == MEM)
1721 return "ldw %1,%0";
1722 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1723 {
1724 long i;
1725 REAL_VALUE_TYPE d;
1726
1727 if (GET_MODE (operands[1]) != SFmode)
1728 abort ();
1729
1730 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1731 bit pattern. */
1732 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1733 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1734
1735 operands[1] = GEN_INT (i);
1736 /* Fall through to CONST_INT case. */
1737 }
1738 if (GET_CODE (operands[1]) == CONST_INT)
1739 {
1740 intval = INTVAL (operands[1]);
1741
1742 if (VAL_14_BITS_P (intval))
1743 return "ldi %1,%0";
1744 else if ((intval & 0x7ff) == 0)
1745 return "ldil L'%1,%0";
1746 else if (zdepi_cint_p (intval))
1747 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1748 else
1749 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1750 }
1751 return "copy %1,%0";
1752 }
1753 \f
1754
1755 /* Compute position (in OP[1]) and width (in OP[2])
1756 useful for copying IMM to a register using the zdepi
1757 instructions. Store the immediate value to insert in OP[0]. */
1758 static void
1759 compute_zdepwi_operands (imm, op)
1760 unsigned HOST_WIDE_INT imm;
1761 unsigned *op;
1762 {
1763 int lsb, len;
1764
1765 /* Find the least significant set bit in IMM. */
1766 for (lsb = 0; lsb < 32; lsb++)
1767 {
1768 if ((imm & 1) != 0)
1769 break;
1770 imm >>= 1;
1771 }
1772
1773 /* Choose variants based on *sign* of the 5-bit field. */
1774 if ((imm & 0x10) == 0)
1775 len = (lsb <= 28) ? 4 : 32 - lsb;
1776 else
1777 {
1778 /* Find the width of the bitstring in IMM. */
1779 for (len = 5; len < 32; len++)
1780 {
1781 if ((imm & (1 << len)) == 0)
1782 break;
1783 }
1784
1785 /* Sign extend IMM as a 5-bit value. */
1786 imm = (imm & 0xf) - 0x10;
1787 }
1788
1789 op[0] = imm;
1790 op[1] = 31 - lsb;
1791 op[2] = len;
1792 }
1793
1794 /* Compute position (in OP[1]) and width (in OP[2])
1795 useful for copying IMM to a register using the depdi,z
1796 instructions. Store the immediate value to insert in OP[0]. */
1797 void
1798 compute_zdepdi_operands (imm, op)
1799 unsigned HOST_WIDE_INT imm;
1800 unsigned *op;
1801 {
1802 HOST_WIDE_INT lsb, len;
1803
1804 /* Find the least significant set bit in IMM. */
1805 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1806 {
1807 if ((imm & 1) != 0)
1808 break;
1809 imm >>= 1;
1810 }
1811
1812 /* Choose variants based on *sign* of the 5-bit field. */
1813 if ((imm & 0x10) == 0)
1814 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1815 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1816 else
1817 {
1818 /* Find the width of the bitstring in IMM. */
1819 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1820 {
1821 if ((imm & ((unsigned HOST_WIDE_INT)1 << len)) == 0)
1822 break;
1823 }
1824
1825 /* Sign extend IMM as a 5-bit value. */
1826 imm = (imm & 0xf) - 0x10;
1827 }
1828
1829 op[0] = imm;
1830 op[1] = 63 - lsb;
1831 op[2] = len;
1832 }
1833
1834 /* Output assembler code to perform a doubleword move insn
1835 with operands OPERANDS. */
1836
1837 const char *
1838 output_move_double (operands)
1839 rtx *operands;
1840 {
1841 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1842 rtx latehalf[2];
1843 rtx addreg0 = 0, addreg1 = 0;
1844
1845 /* First classify both operands. */
1846
1847 if (REG_P (operands[0]))
1848 optype0 = REGOP;
1849 else if (offsettable_memref_p (operands[0]))
1850 optype0 = OFFSOP;
1851 else if (GET_CODE (operands[0]) == MEM)
1852 optype0 = MEMOP;
1853 else
1854 optype0 = RNDOP;
1855
1856 if (REG_P (operands[1]))
1857 optype1 = REGOP;
1858 else if (CONSTANT_P (operands[1]))
1859 optype1 = CNSTOP;
1860 else if (offsettable_memref_p (operands[1]))
1861 optype1 = OFFSOP;
1862 else if (GET_CODE (operands[1]) == MEM)
1863 optype1 = MEMOP;
1864 else
1865 optype1 = RNDOP;
1866
1867 /* Check for the cases that the operand constraints are not
1868 supposed to allow to happen. Abort if we get one,
1869 because generating code for these cases is painful. */
1870
1871 if (optype0 != REGOP && optype1 != REGOP)
1872 abort ();
1873
1874 /* Handle auto decrementing and incrementing loads and stores
1875 specifically, since the structure of the function doesn't work
1876 for them without major modification. Do it better when we learn
1877 this port about the general inc/dec addressing of PA.
1878 (This was written by tege. Chide him if it doesn't work.) */
1879
1880 if (optype0 == MEMOP)
1881 {
1882 /* We have to output the address syntax ourselves, since print_operand
1883 doesn't deal with the addresses we want to use. Fix this later. */
1884
1885 rtx addr = XEXP (operands[0], 0);
1886 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1887 {
1888 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1889
1890 operands[0] = XEXP (addr, 0);
1891 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1892 abort ();
1893
1894 if (!reg_overlap_mentioned_p (high_reg, addr))
1895 {
1896 /* No overlap between high target register and address
1897 register. (We do this in a non-obvious way to
1898 save a register file writeback) */
1899 if (GET_CODE (addr) == POST_INC)
1900 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
1901 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
1902 }
1903 else
1904 abort();
1905 }
1906 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1907 {
1908 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1909
1910 operands[0] = XEXP (addr, 0);
1911 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1912 abort ();
1913
1914 if (!reg_overlap_mentioned_p (high_reg, addr))
1915 {
1916 /* No overlap between high target register and address
1917 register. (We do this in a non-obvious way to
1918 save a register file writeback) */
1919 if (GET_CODE (addr) == PRE_INC)
1920 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
1921 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
1922 }
1923 else
1924 abort();
1925 }
1926 }
1927 if (optype1 == MEMOP)
1928 {
1929 /* We have to output the address syntax ourselves, since print_operand
1930 doesn't deal with the addresses we want to use. Fix this later. */
1931
1932 rtx addr = XEXP (operands[1], 0);
1933 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1934 {
1935 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1936
1937 operands[1] = XEXP (addr, 0);
1938 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1939 abort ();
1940
1941 if (!reg_overlap_mentioned_p (high_reg, addr))
1942 {
1943 /* No overlap between high target register and address
1944 register. (We do this in a non-obvious way to
1945 save a register file writeback) */
1946 if (GET_CODE (addr) == POST_INC)
1947 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
1948 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0}";
1949 }
1950 else
1951 {
1952 /* This is an undefined situation. We should load into the
1953 address register *and* update that register. Probably
1954 we don't need to handle this at all. */
1955 if (GET_CODE (addr) == POST_INC)
1956 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
1957 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
1958 }
1959 }
1960 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1961 {
1962 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1963
1964 operands[1] = XEXP (addr, 0);
1965 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1966 abort ();
1967
1968 if (!reg_overlap_mentioned_p (high_reg, addr))
1969 {
1970 /* No overlap between high target register and address
1971 register. (We do this in a non-obvious way to
1972 save a register file writeback) */
1973 if (GET_CODE (addr) == PRE_INC)
1974 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
1975 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
1976 }
1977 else
1978 {
1979 /* This is an undefined situation. We should load into the
1980 address register *and* update that register. Probably
1981 we don't need to handle this at all. */
1982 if (GET_CODE (addr) == PRE_INC)
1983 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
1984 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
1985 }
1986 }
1987 else if (GET_CODE (addr) == PLUS
1988 && GET_CODE (XEXP (addr, 0)) == MULT)
1989 {
1990 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1991
1992 if (!reg_overlap_mentioned_p (high_reg, addr))
1993 {
1994 rtx xoperands[3];
1995
1996 xoperands[0] = high_reg;
1997 xoperands[1] = XEXP (addr, 1);
1998 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1999 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2000 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2001 xoperands);
2002 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2003 }
2004 else
2005 {
2006 rtx xoperands[3];
2007
2008 xoperands[0] = high_reg;
2009 xoperands[1] = XEXP (addr, 1);
2010 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2011 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2012 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2013 xoperands);
2014 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2015 }
2016 }
2017 }
2018
2019 /* If an operand is an unoffsettable memory ref, find a register
2020 we can increment temporarily to make it refer to the second word. */
2021
2022 if (optype0 == MEMOP)
2023 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2024
2025 if (optype1 == MEMOP)
2026 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2027
2028 /* Ok, we can do one word at a time.
2029 Normally we do the low-numbered word first.
2030
2031 In either case, set up in LATEHALF the operands to use
2032 for the high-numbered word and in some cases alter the
2033 operands in OPERANDS to be suitable for the low-numbered word. */
2034
2035 if (optype0 == REGOP)
2036 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2037 else if (optype0 == OFFSOP)
2038 latehalf[0] = adj_offsettable_operand (operands[0], 4);
2039 else
2040 latehalf[0] = operands[0];
2041
2042 if (optype1 == REGOP)
2043 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2044 else if (optype1 == OFFSOP)
2045 latehalf[1] = adj_offsettable_operand (operands[1], 4);
2046 else if (optype1 == CNSTOP)
2047 split_double (operands[1], &operands[1], &latehalf[1]);
2048 else
2049 latehalf[1] = operands[1];
2050
2051 /* If the first move would clobber the source of the second one,
2052 do them in the other order.
2053
2054 This can happen in two cases:
2055
2056 mem -> register where the first half of the destination register
2057 is the same register used in the memory's address. Reload
2058 can create such insns.
2059
2060 mem in this case will be either register indirect or register
2061 indirect plus a valid offset.
2062
2063 register -> register move where REGNO(dst) == REGNO(src + 1)
2064 someone (Tim/Tege?) claimed this can happen for parameter loads.
2065
2066 Handle mem -> register case first. */
2067 if (optype0 == REGOP
2068 && (optype1 == MEMOP || optype1 == OFFSOP)
2069 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2070 operands[1], 0))
2071 {
2072 /* Do the late half first. */
2073 if (addreg1)
2074 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2075 output_asm_insn (singlemove_string (latehalf), latehalf);
2076
2077 /* Then clobber. */
2078 if (addreg1)
2079 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2080 return singlemove_string (operands);
2081 }
2082
2083 /* Now handle register -> register case. */
2084 if (optype0 == REGOP && optype1 == REGOP
2085 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2086 {
2087 output_asm_insn (singlemove_string (latehalf), latehalf);
2088 return singlemove_string (operands);
2089 }
2090
2091 /* Normal case: do the two words, low-numbered first. */
2092
2093 output_asm_insn (singlemove_string (operands), operands);
2094
2095 /* Make any unoffsettable addresses point at high-numbered word. */
2096 if (addreg0)
2097 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2098 if (addreg1)
2099 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2100
2101 /* Do that word. */
2102 output_asm_insn (singlemove_string (latehalf), latehalf);
2103
2104 /* Undo the adds we just did. */
2105 if (addreg0)
2106 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2107 if (addreg1)
2108 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2109
2110 return "";
2111 }
2112 \f
2113 const char *
2114 output_fp_move_double (operands)
2115 rtx *operands;
2116 {
2117 if (FP_REG_P (operands[0]))
2118 {
2119 if (FP_REG_P (operands[1])
2120 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2121 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2122 else
2123 output_asm_insn ("fldd%F1 %1,%0", operands);
2124 }
2125 else if (FP_REG_P (operands[1]))
2126 {
2127 output_asm_insn ("fstd%F0 %1,%0", operands);
2128 }
2129 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2130 {
2131 if (GET_CODE (operands[0]) == REG)
2132 {
2133 rtx xoperands[2];
2134 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2135 xoperands[0] = operands[0];
2136 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2137 }
2138 /* This is a pain. You have to be prepared to deal with an
2139 arbitrary address here including pre/post increment/decrement.
2140
2141 so avoid this in the MD. */
2142 else
2143 abort ();
2144 }
2145 else abort ();
2146 return "";
2147 }
2148 \f
2149 /* Return a REG that occurs in ADDR with coefficient 1.
2150 ADDR can be effectively incremented by incrementing REG. */
2151
2152 static rtx
2153 find_addr_reg (addr)
2154 rtx addr;
2155 {
2156 while (GET_CODE (addr) == PLUS)
2157 {
2158 if (GET_CODE (XEXP (addr, 0)) == REG)
2159 addr = XEXP (addr, 0);
2160 else if (GET_CODE (XEXP (addr, 1)) == REG)
2161 addr = XEXP (addr, 1);
2162 else if (CONSTANT_P (XEXP (addr, 0)))
2163 addr = XEXP (addr, 1);
2164 else if (CONSTANT_P (XEXP (addr, 1)))
2165 addr = XEXP (addr, 0);
2166 else
2167 abort ();
2168 }
2169 if (GET_CODE (addr) == REG)
2170 return addr;
2171 abort ();
2172 }
2173
2174 /* Emit code to perform a block move.
2175
2176 OPERANDS[0] is the destination pointer as a REG, clobbered.
2177 OPERANDS[1] is the source pointer as a REG, clobbered.
2178 OPERANDS[2] is a register for temporary storage.
2179 OPERANDS[4] is the size as a CONST_INT
2180 OPERANDS[3] is a register for temporary storage.
2181 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2182 OPERANDS[6] is another temporary register. */
2183
2184 const char *
2185 output_block_move (operands, size_is_constant)
2186 rtx *operands;
2187 int size_is_constant ATTRIBUTE_UNUSED;
2188 {
2189 int align = INTVAL (operands[5]);
2190 unsigned long n_bytes = INTVAL (operands[4]);
2191
2192 /* We can't move more than four bytes at a time because the PA
2193 has no longer integer move insns. (Could use fp mem ops?) */
2194 if (align > 4)
2195 align = 4;
2196
2197 /* Note that we know each loop below will execute at least twice
2198 (else we would have open-coded the copy). */
2199 switch (align)
2200 {
2201 case 4:
2202 /* Pre-adjust the loop counter. */
2203 operands[4] = GEN_INT (n_bytes - 8);
2204 output_asm_insn ("ldi %4,%2", operands);
2205
2206 /* Copying loop. */
2207 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2208 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2209 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2210 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2211 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2212
2213 /* Handle the residual. There could be up to 7 bytes of
2214 residual to copy! */
2215 if (n_bytes % 8 != 0)
2216 {
2217 operands[4] = GEN_INT (n_bytes % 4);
2218 if (n_bytes % 8 >= 4)
2219 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2220 if (n_bytes % 4 != 0)
2221 output_asm_insn ("ldw 0(%1),%6", operands);
2222 if (n_bytes % 8 >= 4)
2223 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2224 if (n_bytes % 4 != 0)
2225 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2226 }
2227 return "";
2228
2229 case 2:
2230 /* Pre-adjust the loop counter. */
2231 operands[4] = GEN_INT (n_bytes - 4);
2232 output_asm_insn ("ldi %4,%2", operands);
2233
2234 /* Copying loop. */
2235 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2236 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2237 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2238 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2239 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2240
2241 /* Handle the residual. */
2242 if (n_bytes % 4 != 0)
2243 {
2244 if (n_bytes % 4 >= 2)
2245 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2246 if (n_bytes % 2 != 0)
2247 output_asm_insn ("ldb 0(%1),%6", operands);
2248 if (n_bytes % 4 >= 2)
2249 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2250 if (n_bytes % 2 != 0)
2251 output_asm_insn ("stb %6,0(%0)", operands);
2252 }
2253 return "";
2254
2255 case 1:
2256 /* Pre-adjust the loop counter. */
2257 operands[4] = GEN_INT (n_bytes - 2);
2258 output_asm_insn ("ldi %4,%2", operands);
2259
2260 /* Copying loop. */
2261 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2262 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2263 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2264 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2265 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2266
2267 /* Handle the residual. */
2268 if (n_bytes % 2 != 0)
2269 {
2270 output_asm_insn ("ldb 0(%1),%3", operands);
2271 output_asm_insn ("stb %3,0(%0)", operands);
2272 }
2273 return "";
2274
2275 default:
2276 abort ();
2277 }
2278 }
2279
2280 /* Count the number of insns necessary to handle this block move.
2281
2282 Basic structure is the same as emit_block_move, except that we
2283 count insns rather than emit them. */
2284
2285 static int
2286 compute_movstrsi_length (insn)
2287 rtx insn;
2288 {
2289 rtx pat = PATTERN (insn);
2290 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2291 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
2292 unsigned int n_insns = 0;
2293
2294 /* We can't move more than four bytes at a time because the PA
2295 has no longer integer move insns. (Could use fp mem ops?) */
2296 if (align > 4)
2297 align = 4;
2298
2299 /* The basic copying loop. */
2300 n_insns = 6;
2301
2302 /* Residuals. */
2303 if (n_bytes % (2 * align) != 0)
2304 {
2305 if ((n_bytes % (2 * align)) >= align)
2306 n_insns += 2;
2307
2308 if ((n_bytes % align) != 0)
2309 n_insns += 2;
2310 }
2311
2312 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2313 return n_insns * 4;
2314 }
2315 \f
2316
2317 const char *
2318 output_and (operands)
2319 rtx *operands;
2320 {
2321 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2322 {
2323 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2324 int ls0, ls1, ms0, p, len;
2325
2326 for (ls0 = 0; ls0 < 32; ls0++)
2327 if ((mask & (1 << ls0)) == 0)
2328 break;
2329
2330 for (ls1 = ls0; ls1 < 32; ls1++)
2331 if ((mask & (1 << ls1)) != 0)
2332 break;
2333
2334 for (ms0 = ls1; ms0 < 32; ms0++)
2335 if ((mask & (1 << ms0)) == 0)
2336 break;
2337
2338 if (ms0 != 32)
2339 abort();
2340
2341 if (ls1 == 32)
2342 {
2343 len = ls0;
2344
2345 if (len == 0)
2346 abort ();
2347
2348 operands[2] = GEN_INT (len);
2349 return "{extru|extrw,u} %1,31,%2,%0";
2350 }
2351 else
2352 {
2353 /* We could use this `depi' for the case above as well, but `depi'
2354 requires one more register file access than an `extru'. */
2355
2356 p = 31 - ls0;
2357 len = ls1 - ls0;
2358
2359 operands[2] = GEN_INT (p);
2360 operands[3] = GEN_INT (len);
2361 return "{depi|depwi} 0,%2,%3,%0";
2362 }
2363 }
2364 else
2365 return "and %1,%2,%0";
2366 }
2367
2368 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2369 storing the result in operands[0]. */
2370 const char *
2371 output_64bit_and (operands)
2372 rtx *operands;
2373 {
2374 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2375 {
2376 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2377 int ls0, ls1, ms0, p, len;
2378
2379 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2380 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2381 break;
2382
2383 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2384 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2385 break;
2386
2387 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2388 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2389 break;
2390
2391 if (ms0 != HOST_BITS_PER_WIDE_INT)
2392 abort ();
2393
2394 if (ls1 == HOST_BITS_PER_WIDE_INT)
2395 {
2396 len = ls0;
2397
2398 if (len == 0)
2399 abort ();
2400
2401 operands[2] = GEN_INT (len);
2402 return "extrd,u %1,63,%2,%0";
2403 }
2404 else
2405 {
2406 /* We could use this `depi' for the case above as well, but `depi'
2407 requires one more register file access than an `extru'. */
2408
2409 p = 63 - ls0;
2410 len = ls1 - ls0;
2411
2412 operands[2] = GEN_INT (p);
2413 operands[3] = GEN_INT (len);
2414 return "depdi 0,%2,%3,%0";
2415 }
2416 }
2417 else
2418 return "and %1,%2,%0";
2419 }
2420
2421 const char *
2422 output_ior (operands)
2423 rtx *operands;
2424 {
2425 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2426 int bs0, bs1, p, len;
2427
2428 if (INTVAL (operands[2]) == 0)
2429 return "copy %1,%0";
2430
2431 for (bs0 = 0; bs0 < 32; bs0++)
2432 if ((mask & (1 << bs0)) != 0)
2433 break;
2434
2435 for (bs1 = bs0; bs1 < 32; bs1++)
2436 if ((mask & (1 << bs1)) == 0)
2437 break;
2438
2439 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2440 abort();
2441
2442 p = 31 - bs0;
2443 len = bs1 - bs0;
2444
2445 operands[2] = GEN_INT (p);
2446 operands[3] = GEN_INT (len);
2447 return "{depi|depwi} -1,%2,%3,%0";
2448 }
2449
2450 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2451 storing the result in operands[0]. */
2452 const char *
2453 output_64bit_ior (operands)
2454 rtx *operands;
2455 {
2456 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2457 int bs0, bs1, p, len;
2458
2459 if (INTVAL (operands[2]) == 0)
2460 return "copy %1,%0";
2461
2462 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2463 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2464 break;
2465
2466 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2467 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2468 break;
2469
2470 if (bs1 != HOST_BITS_PER_WIDE_INT
2471 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2472 abort ();
2473
2474 p = 63 - bs0;
2475 len = bs1 - bs0;
2476
2477 operands[2] = GEN_INT (p);
2478 operands[3] = GEN_INT (len);
2479 return "depdi -1,%2,%3,%0";
2480 }
2481 \f
2482 /* Output an ascii string. */
2483 void
2484 output_ascii (file, p, size)
2485 FILE *file;
2486 const unsigned char *p;
2487 int size;
2488 {
2489 int i;
2490 int chars_output;
2491 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2492
2493 /* The HP assembler can only take strings of 256 characters at one
2494 time. This is a limitation on input line length, *not* the
2495 length of the string. Sigh. Even worse, it seems that the
2496 restriction is in number of input characters (see \xnn &
2497 \whatever). So we have to do this very carefully. */
2498
2499 fputs ("\t.STRING \"", file);
2500
2501 chars_output = 0;
2502 for (i = 0; i < size; i += 4)
2503 {
2504 int co = 0;
2505 int io = 0;
2506 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2507 {
2508 register unsigned int c = p[i + io];
2509
2510 if (c == '\"' || c == '\\')
2511 partial_output[co++] = '\\';
2512 if (c >= ' ' && c < 0177)
2513 partial_output[co++] = c;
2514 else
2515 {
2516 unsigned int hexd;
2517 partial_output[co++] = '\\';
2518 partial_output[co++] = 'x';
2519 hexd = c / 16 - 0 + '0';
2520 if (hexd > '9')
2521 hexd -= '9' - 'a' + 1;
2522 partial_output[co++] = hexd;
2523 hexd = c % 16 - 0 + '0';
2524 if (hexd > '9')
2525 hexd -= '9' - 'a' + 1;
2526 partial_output[co++] = hexd;
2527 }
2528 }
2529 if (chars_output + co > 243)
2530 {
2531 fputs ("\"\n\t.STRING \"", file);
2532 chars_output = 0;
2533 }
2534 fwrite (partial_output, 1, co, file);
2535 chars_output += co;
2536 co = 0;
2537 }
2538 fputs ("\"\n", file);
2539 }
2540
2541 /* Try to rewrite floating point comparisons & branches to avoid
2542 useless add,tr insns.
2543
2544 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2545 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2546 first attempt to remove useless add,tr insns. It is zero
2547 for the second pass as reorg sometimes leaves bogus REG_DEAD
2548 notes lying around.
2549
2550 When CHECK_NOTES is zero we can only eliminate add,tr insns
2551 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2552 instructions. */
2553 static void
2554 remove_useless_addtr_insns (insns, check_notes)
2555 rtx insns;
2556 int check_notes;
2557 {
2558 rtx insn;
2559 static int pass = 0;
2560
2561 /* This is fairly cheap, so always run it when optimizing. */
2562 if (optimize > 0)
2563 {
2564 int fcmp_count = 0;
2565 int fbranch_count = 0;
2566
2567 /* Walk all the insns in this function looking for fcmp & fbranch
2568 instructions. Keep track of how many of each we find. */
2569 insns = get_insns ();
2570 for (insn = insns; insn; insn = next_insn (insn))
2571 {
2572 rtx tmp;
2573
2574 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2575 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2576 continue;
2577
2578 tmp = PATTERN (insn);
2579
2580 /* It must be a set. */
2581 if (GET_CODE (tmp) != SET)
2582 continue;
2583
2584 /* If the destination is CCFP, then we've found an fcmp insn. */
2585 tmp = SET_DEST (tmp);
2586 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2587 {
2588 fcmp_count++;
2589 continue;
2590 }
2591
2592 tmp = PATTERN (insn);
2593 /* If this is an fbranch instruction, bump the fbranch counter. */
2594 if (GET_CODE (tmp) == SET
2595 && SET_DEST (tmp) == pc_rtx
2596 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2597 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2598 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2599 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2600 {
2601 fbranch_count++;
2602 continue;
2603 }
2604 }
2605
2606
2607 /* Find all floating point compare + branch insns. If possible,
2608 reverse the comparison & the branch to avoid add,tr insns. */
2609 for (insn = insns; insn; insn = next_insn (insn))
2610 {
2611 rtx tmp, next;
2612
2613 /* Ignore anything that isn't an INSN. */
2614 if (GET_CODE (insn) != INSN)
2615 continue;
2616
2617 tmp = PATTERN (insn);
2618
2619 /* It must be a set. */
2620 if (GET_CODE (tmp) != SET)
2621 continue;
2622
2623 /* The destination must be CCFP, which is register zero. */
2624 tmp = SET_DEST (tmp);
2625 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2626 continue;
2627
2628 /* INSN should be a set of CCFP.
2629
2630 See if the result of this insn is used in a reversed FP
2631 conditional branch. If so, reverse our condition and
2632 the branch. Doing so avoids useless add,tr insns. */
2633 next = next_insn (insn);
2634 while (next)
2635 {
2636 /* Jumps, calls and labels stop our search. */
2637 if (GET_CODE (next) == JUMP_INSN
2638 || GET_CODE (next) == CALL_INSN
2639 || GET_CODE (next) == CODE_LABEL)
2640 break;
2641
2642 /* As does another fcmp insn. */
2643 if (GET_CODE (next) == INSN
2644 && GET_CODE (PATTERN (next)) == SET
2645 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2646 && REGNO (SET_DEST (PATTERN (next))) == 0)
2647 break;
2648
2649 next = next_insn (next);
2650 }
2651
2652 /* Is NEXT_INSN a branch? */
2653 if (next
2654 && GET_CODE (next) == JUMP_INSN)
2655 {
2656 rtx pattern = PATTERN (next);
2657
2658 /* If it a reversed fp conditional branch (eg uses add,tr)
2659 and CCFP dies, then reverse our conditional and the branch
2660 to avoid the add,tr. */
2661 if (GET_CODE (pattern) == SET
2662 && SET_DEST (pattern) == pc_rtx
2663 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2664 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2665 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2666 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2667 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2668 && (fcmp_count == fbranch_count
2669 || (check_notes
2670 && find_regno_note (next, REG_DEAD, 0))))
2671 {
2672 /* Reverse the branch. */
2673 tmp = XEXP (SET_SRC (pattern), 1);
2674 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2675 XEXP (SET_SRC (pattern), 2) = tmp;
2676 INSN_CODE (next) = -1;
2677
2678 /* Reverse our condition. */
2679 tmp = PATTERN (insn);
2680 PUT_CODE (XEXP (tmp, 1),
2681 reverse_condition_maybe_unordered (GET_CODE (XEXP (tmp,
2682 1))));
2683 }
2684 }
2685 }
2686 }
2687
2688 pass = !pass;
2689
2690 }
2691 \f
2692 /* You may have trouble believing this, but this is the 32 bit HP-PA stack
2693 layout. Wow.
2694
2695 Offset Contents
2696
2697 Variable arguments (optional; any number may be allocated)
2698
2699 SP-(4*(N+9)) arg word N
2700 : :
2701 SP-56 arg word 5
2702 SP-52 arg word 4
2703
2704 Fixed arguments (must be allocated; may remain unused)
2705
2706 SP-48 arg word 3
2707 SP-44 arg word 2
2708 SP-40 arg word 1
2709 SP-36 arg word 0
2710
2711 Frame Marker
2712
2713 SP-32 External Data Pointer (DP)
2714 SP-28 External sr4
2715 SP-24 External/stub RP (RP')
2716 SP-20 Current RP
2717 SP-16 Static Link
2718 SP-12 Clean up
2719 SP-8 Calling Stub RP (RP'')
2720 SP-4 Previous SP
2721
2722 Top of Frame
2723
2724 SP-0 Stack Pointer (points to next available address)
2725
2726 */
2727
2728 /* This function saves registers as follows. Registers marked with ' are
2729 this function's registers (as opposed to the previous function's).
2730 If a frame_pointer isn't needed, r4 is saved as a general register;
2731 the space for the frame pointer is still allocated, though, to keep
2732 things simple.
2733
2734
2735 Top of Frame
2736
2737 SP (FP') Previous FP
2738 SP + 4 Alignment filler (sigh)
2739 SP + 8 Space for locals reserved here.
2740 .
2741 .
2742 .
2743 SP + n All call saved register used.
2744 .
2745 .
2746 .
2747 SP + o All call saved fp registers used.
2748 .
2749 .
2750 .
2751 SP + p (SP') points to next available address.
2752
2753 */
2754
2755 /* Global variables set by FUNCTION_PROLOGUE. */
2756 /* Size of frame. Need to know this to emit return insns from
2757 leaf procedures. */
2758 static int actual_fsize;
2759 static int local_fsize, save_fregs;
2760
2761 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2762 Handle case where DISP > 8k by using the add_high_const patterns.
2763
2764 Note in DISP > 8k case, we will leave the high part of the address
2765 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2766
2767 static rtx
2768 store_reg (reg, disp, base)
2769 int reg, disp, base;
2770 {
2771 rtx i, dest, src, basereg;
2772
2773 src = gen_rtx_REG (word_mode, reg);
2774 basereg = gen_rtx_REG (Pmode, base);
2775 if (VAL_14_BITS_P (disp))
2776 {
2777 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
2778 i = emit_move_insn (dest, src);
2779 }
2780 else
2781 {
2782 rtx delta = GEN_INT (disp);
2783 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
2784 rtx tmpreg = gen_rtx_REG (Pmode, 1);
2785 emit_move_insn (tmpreg, high);
2786 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
2787 i = emit_move_insn (dest, src);
2788 }
2789 return i;
2790 }
2791
2792 /* Emit RTL to set REG to the value specified by BASE+DISP.
2793 Handle case where DISP > 8k by using the add_high_const patterns.
2794
2795 Note in DISP > 8k case, we will leave the high part of the address
2796 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2797
2798 static rtx
2799 set_reg_plus_d (reg, base, disp)
2800 int reg, base, disp;
2801 {
2802 rtx i;
2803
2804 if (VAL_14_BITS_P (disp))
2805 {
2806 i = emit_move_insn (gen_rtx_REG (Pmode, reg),
2807 plus_constant (gen_rtx_REG (Pmode, base), disp));
2808 }
2809 else
2810 {
2811 rtx delta = GEN_INT (disp);
2812 emit_move_insn (gen_rtx_REG (Pmode, 1),
2813 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
2814 gen_rtx_HIGH (Pmode, delta)));
2815 i = emit_move_insn (gen_rtx_REG (Pmode, reg),
2816 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
2817 delta));
2818 }
2819 return i;
2820 }
2821
2822 int
2823 compute_frame_size (size, fregs_live)
2824 int size;
2825 int *fregs_live;
2826 {
2827 int i, fsize;
2828
2829 /* Space for frame pointer + filler. If any frame is allocated
2830 we need to add this in because of STARTING_FRAME_OFFSET.
2831
2832 Similar code also appears in hppa_expand_prologue. Change both
2833 of them at the same time. */
2834 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
2835
2836 /* Account for space used by the callee general register saves. */
2837 for (i = 18; i >= 3; i--)
2838 if (regs_ever_live[i])
2839 fsize += UNITS_PER_WORD;
2840
2841 /* Round the stack. */
2842 fsize = (fsize + 7) & ~7;
2843
2844 /* Account for space used by the callee floating point register saves. */
2845 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
2846 if (regs_ever_live[i]
2847 || (! TARGET_64BIT && regs_ever_live[i + 1]))
2848 {
2849 if (fregs_live)
2850 *fregs_live = 1;
2851
2852 /* We always save both halves of the FP register, so always
2853 increment the frame size by 8 bytes. */
2854 fsize += 8;
2855 }
2856
2857 /* The various ABIs include space for the outgoing parameters in the
2858 size of the current function's stack frame. */
2859 fsize += current_function_outgoing_args_size;
2860
2861 /* Allocate space for the fixed frame marker. This space must be
2862 allocated for any function that makes calls or otherwise allocates
2863 stack space. */
2864 if (!current_function_is_leaf || fsize)
2865 fsize += 32;
2866
2867 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
2868 }
2869
2870 void
2871 output_function_prologue (file, size)
2872 FILE *file;
2873 int size ATTRIBUTE_UNUSED;
2874 {
2875 /* The function's label and associated .PROC must never be
2876 separated and must be output *after* any profiling declarations
2877 to avoid changing spaces/subspaces within a procedure. */
2878 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2879 fputs ("\t.PROC\n", file);
2880
2881 /* hppa_expand_prologue does the dirty work now. We just need
2882 to output the assembler directives which denote the start
2883 of a function. */
2884 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2885 if (regs_ever_live[2])
2886 fputs (",CALLS,SAVE_RP", file);
2887 else
2888 fputs (",NO_CALLS", file);
2889
2890 if (frame_pointer_needed)
2891 fputs (",SAVE_SP", file);
2892
2893 /* Pass on information about the number of callee register saves
2894 performed in the prologue.
2895
2896 The compiler is supposed to pass the highest register number
2897 saved, the assembler then has to adjust that number before
2898 entering it into the unwind descriptor (to account for any
2899 caller saved registers with lower register numbers than the
2900 first callee saved register). */
2901 if (gr_saved)
2902 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2903
2904 if (fr_saved)
2905 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2906
2907 fputs ("\n\t.ENTRY\n", file);
2908
2909 /* If we're using GAS and not using the portable runtime model, then
2910 we don't need to accumulate the total number of code bytes. */
2911 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2912 total_code_bytes = 0;
2913 else if (INSN_ADDRESSES_SET_P ())
2914 {
2915 unsigned int old_total = total_code_bytes;
2916
2917 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn()));
2918 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2919
2920 /* Be prepared to handle overflows. */
2921 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2922 }
2923 else
2924 total_code_bytes = -1;
2925
2926 remove_useless_addtr_insns (get_insns (), 0);
2927 }
2928
2929 #if DO_FRAME_NOTES
2930 #define FRP(INSN) \
2931 do \
2932 { \
2933 rtx insn = INSN; \
2934 RTX_FRAME_RELATED_P (insn) = 1; \
2935 } \
2936 while (0)
2937 #else
2938 #define FRP (INSN) INSN
2939 #endif
2940
2941 void
2942 hppa_expand_prologue ()
2943 {
2944 extern char call_used_regs[];
2945 int size = get_frame_size ();
2946 int merge_sp_adjust_with_store = 0;
2947 int i, offset;
2948 rtx tmpreg, size_rtx;
2949
2950 gr_saved = 0;
2951 fr_saved = 0;
2952 save_fregs = 0;
2953
2954 /* Allocate space for frame pointer + filler. If any frame is allocated
2955 we need to add this in because of STARTING_FRAME_OFFSET.
2956
2957 Similar code also appears in compute_frame_size. Change both
2958 of them at the same time. */
2959 local_fsize = size + (size || frame_pointer_needed
2960 ? STARTING_FRAME_OFFSET : 0);
2961
2962 actual_fsize = compute_frame_size (size, &save_fregs);
2963
2964 /* Compute a few things we will use often. */
2965 tmpreg = gen_rtx_REG (word_mode, 1);
2966 size_rtx = GEN_INT (actual_fsize);
2967
2968 /* Save RP first. The calling conventions manual states RP will
2969 always be stored into the caller's frame at sp - 20 or sp - 16
2970 depending on which ABI is in use. */
2971 if (regs_ever_live[2])
2972 FRP (store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM));
2973
2974 /* Allocate the local frame and set up the frame pointer if needed. */
2975 if (actual_fsize != 0)
2976 {
2977 if (frame_pointer_needed)
2978 {
2979 /* Copy the old frame pointer temporarily into %r1. Set up the
2980 new stack pointer, then store away the saved old frame pointer
2981 into the stack at sp+actual_fsize and at the same time update
2982 the stack pointer by actual_fsize bytes. Two versions, first
2983 handles small (<8k) frames. The second handles large (>=8k)
2984 frames. */
2985 emit_move_insn (tmpreg, frame_pointer_rtx);
2986 FRP (emit_move_insn (frame_pointer_rtx, stack_pointer_rtx));
2987 if (VAL_14_BITS_P (actual_fsize))
2988 {
2989 rtx insn = emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
2990 size_rtx));
2991 if (DO_FRAME_NOTES)
2992 {
2993 rtvec vec;
2994 RTX_FRAME_RELATED_P (insn) = 1;
2995 vec = gen_rtvec (2,
2996 gen_rtx_SET (VOIDmode,
2997 gen_rtx_MEM (word_mode,
2998 stack_pointer_rtx),
2999 frame_pointer_rtx),
3000 gen_rtx_SET (VOIDmode,
3001 stack_pointer_rtx,
3002 gen_rtx_PLUS (word_mode,
3003 stack_pointer_rtx,
3004 size_rtx)));
3005 REG_NOTES (insn)
3006 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3007 gen_rtx_SEQUENCE (VOIDmode, vec),
3008 REG_NOTES (insn));
3009 }
3010 }
3011 else
3012 {
3013 /* It is incorrect to store the saved frame pointer at *sp,
3014 then increment sp (writes beyond the current stack boundary).
3015
3016 So instead use stwm to store at *sp and post-increment the
3017 stack pointer as an atomic operation. Then increment sp to
3018 finish allocating the new frame. */
3019 int adjust1 = 8192 - 64;
3020 int adjust2 = actual_fsize - adjust1;
3021 rtx delta = GEN_INT (adjust1);
3022 rtx insn = emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
3023 delta));
3024 if (DO_FRAME_NOTES)
3025 {
3026 rtvec vec;
3027 RTX_FRAME_RELATED_P (insn) = 1;
3028 vec = gen_rtvec (2,
3029 gen_rtx_SET (VOIDmode,
3030 gen_rtx_MEM (word_mode,
3031 stack_pointer_rtx),
3032 frame_pointer_rtx),
3033 gen_rtx_SET (VOIDmode,
3034 stack_pointer_rtx,
3035 gen_rtx_PLUS (word_mode,
3036 stack_pointer_rtx,
3037 delta)));
3038 REG_NOTES (insn)
3039 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3040 gen_rtx_SEQUENCE (VOIDmode, vec),
3041 REG_NOTES (insn));
3042 }
3043
3044 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3045 STACK_POINTER_REGNUM,
3046 adjust2));
3047 }
3048 /* Prevent register spills from being scheduled before the
3049 stack pointer is raised. Necessary as we will be storing
3050 registers using the frame pointer as a base register, and
3051 we happen to set fp before raising sp. */
3052 emit_insn (gen_blockage ());
3053 }
3054 /* no frame pointer needed. */
3055 else
3056 {
3057 /* In some cases we can perform the first callee register save
3058 and allocating the stack frame at the same time. If so, just
3059 make a note of it and defer allocating the frame until saving
3060 the callee registers. */
3061 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3062 merge_sp_adjust_with_store = 1;
3063 /* Can not optimize. Adjust the stack frame by actual_fsize
3064 bytes. */
3065 else
3066 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3067 STACK_POINTER_REGNUM,
3068 actual_fsize));
3069 }
3070 }
3071
3072 /* Normal register save.
3073
3074 Do not save the frame pointer in the frame_pointer_needed case. It
3075 was done earlier. */
3076 if (frame_pointer_needed)
3077 {
3078 for (i = 18, offset = local_fsize; i >= 4; i--)
3079 if (regs_ever_live[i] && ! call_used_regs[i])
3080 {
3081 FRP (store_reg (i, offset, FRAME_POINTER_REGNUM));
3082 offset += UNITS_PER_WORD;
3083 gr_saved++;
3084 }
3085 /* Account for %r3 which is saved in a special place. */
3086 gr_saved++;
3087 }
3088 /* No frame pointer needed. */
3089 else
3090 {
3091 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3092 if (regs_ever_live[i] && ! call_used_regs[i])
3093 {
3094 /* If merge_sp_adjust_with_store is nonzero, then we can
3095 optimize the first GR save. */
3096 if (merge_sp_adjust_with_store)
3097 {
3098 rtx delta = GEN_INT (-offset);
3099 merge_sp_adjust_with_store = 0;
3100 FRP (emit_insn (gen_post_store (stack_pointer_rtx,
3101 gen_rtx_REG (word_mode, i),
3102 delta)));
3103 }
3104 else
3105 FRP (store_reg (i, offset, STACK_POINTER_REGNUM));
3106 offset += UNITS_PER_WORD;
3107 gr_saved++;
3108 }
3109
3110 /* If we wanted to merge the SP adjustment with a GR save, but we never
3111 did any GR saves, then just emit the adjustment here. */
3112 if (merge_sp_adjust_with_store)
3113 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3114 STACK_POINTER_REGNUM,
3115 actual_fsize));
3116 }
3117
3118 /* The hppa calling conventions say that %r19, the pic offset
3119 register, is saved at sp - 32 (in this function's frame)
3120 when generating PIC code. FIXME: What is the correct thing
3121 to do for functions which make no calls and allocate no
3122 frame? Do we need to allocate a frame, or can we just omit
3123 the save? For now we'll just omit the save. */
3124 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3125 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3126
3127 /* Align pointer properly (doubleword boundary). */
3128 offset = (offset + 7) & ~7;
3129
3130 /* Floating point register store. */
3131 if (save_fregs)
3132 {
3133 /* First get the frame or stack pointer to the start of the FP register
3134 save area. */
3135 if (frame_pointer_needed)
3136 FRP (set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset));
3137 else
3138 FRP (set_reg_plus_d (1, STACK_POINTER_REGNUM, offset));
3139
3140 /* Now actually save the FP registers. */
3141 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3142 {
3143 if (regs_ever_live[i]
3144 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3145 {
3146 rtx addr, reg;
3147 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3148 reg = gen_rtx_REG (DFmode, i);
3149 FRP (emit_move_insn (addr, reg));
3150 fr_saved++;
3151 }
3152 }
3153 }
3154 }
3155
3156 /* ?!? Do we want frame notes in the epilogue yet? */
3157 #undef DO_FRAME_NOTES
3158 #define DO_FRAME_NOTES 0
3159 #undef FRP
3160 #define FRP(INSN) INSN
3161
3162 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3163 Handle case where DISP > 8k by using the add_high_const patterns. */
3164
3165 static rtx
3166 load_reg (reg, disp, base)
3167 int reg, disp, base;
3168 {
3169 rtx i, src, dest, basereg;
3170
3171 dest = gen_rtx_REG (word_mode, reg);
3172 basereg = gen_rtx_REG (Pmode, base);
3173 if (VAL_14_BITS_P (disp))
3174 {
3175 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3176 i = emit_move_insn (dest, src);
3177 }
3178 else
3179 {
3180 rtx delta = GEN_INT (disp);
3181 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3182 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3183 emit_move_insn (tmpreg, high);
3184 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3185 i = emit_move_insn (dest, src);
3186 }
3187 return i;
3188 }
3189
3190 void
3191 output_function_epilogue (file, size)
3192 FILE *file;
3193 int size ATTRIBUTE_UNUSED;
3194 {
3195 rtx insn = get_last_insn ();
3196
3197 /* hppa_expand_epilogue does the dirty work now. We just need
3198 to output the assembler directives which denote the end
3199 of a function.
3200
3201 To make debuggers happy, emit a nop if the epilogue was completely
3202 eliminated due to a volatile call as the last insn in the
3203 current function. That way the return address (in %r2) will
3204 always point to a valid instruction in the current function. */
3205
3206 /* Get the last real insn. */
3207 if (GET_CODE (insn) == NOTE)
3208 insn = prev_real_insn (insn);
3209
3210 /* If it is a sequence, then look inside. */
3211 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3212 insn = XVECEXP (PATTERN (insn), 0, 0);
3213
3214 /* If insn is a CALL_INSN, then it must be a call to a volatile
3215 function (otherwise there would be epilogue insns). */
3216 if (insn && GET_CODE (insn) == CALL_INSN)
3217 fputs ("\tnop\n", file);
3218
3219 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3220 }
3221
3222 void
3223 hppa_expand_epilogue ()
3224 {
3225 rtx tmpreg;
3226 int offset, i;
3227 int merge_sp_adjust_with_load = 0;
3228 int ret_off = 0;
3229
3230 /* We will use this often. */
3231 tmpreg = gen_rtx_REG (word_mode, 1);
3232
3233 /* Try to restore RP early to avoid load/use interlocks when
3234 RP gets used in the return (bv) instruction. This appears to still
3235 be necessary even when we schedule the prologue and epilogue. */
3236 if (regs_ever_live [2])
3237 {
3238 ret_off = TARGET_64BIT ? -16 : -20;
3239 if (frame_pointer_needed)
3240 {
3241 FRP (load_reg (2, ret_off, FRAME_POINTER_REGNUM));
3242 ret_off = 0;
3243 }
3244 else
3245 {
3246 /* No frame pointer, and stack is smaller than 8k. */
3247 if (VAL_14_BITS_P (ret_off - actual_fsize))
3248 {
3249 FRP (load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM));
3250 ret_off = 0;
3251 }
3252 }
3253 }
3254
3255 /* General register restores. */
3256 if (frame_pointer_needed)
3257 {
3258 for (i = 18, offset = local_fsize; i >= 4; i--)
3259 if (regs_ever_live[i] && ! call_used_regs[i])
3260 {
3261 FRP (load_reg (i, offset, FRAME_POINTER_REGNUM));
3262 offset += UNITS_PER_WORD;
3263 }
3264 }
3265 else
3266 {
3267 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3268 {
3269 if (regs_ever_live[i] && ! call_used_regs[i])
3270 {
3271 /* Only for the first load.
3272 merge_sp_adjust_with_load holds the register load
3273 with which we will merge the sp adjustment. */
3274 if (merge_sp_adjust_with_load == 0
3275 && local_fsize == 0
3276 && VAL_14_BITS_P (-actual_fsize))
3277 merge_sp_adjust_with_load = i;
3278 else
3279 FRP (load_reg (i, offset, STACK_POINTER_REGNUM));
3280 offset += UNITS_PER_WORD;
3281 }
3282 }
3283 }
3284
3285 /* Align pointer properly (doubleword boundary). */
3286 offset = (offset + 7) & ~7;
3287
3288 /* FP register restores. */
3289 if (save_fregs)
3290 {
3291 /* Adjust the register to index off of. */
3292 if (frame_pointer_needed)
3293 FRP (set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset));
3294 else
3295 FRP (set_reg_plus_d (1, STACK_POINTER_REGNUM, offset));
3296
3297 /* Actually do the restores now. */
3298 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3299 if (regs_ever_live[i]
3300 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3301 {
3302 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3303 rtx dest = gen_rtx_REG (DFmode, i);
3304 FRP (emit_move_insn (dest, src));
3305 }
3306 }
3307
3308 /* Emit a blockage insn here to keep these insns from being moved to
3309 an earlier spot in the epilogue, or into the main instruction stream.
3310
3311 This is necessary as we must not cut the stack back before all the
3312 restores are finished. */
3313 emit_insn (gen_blockage ());
3314
3315 /* Reset stack pointer (and possibly frame pointer). The stack
3316 pointer is initially set to fp + 64 to avoid a race condition. */
3317 if (frame_pointer_needed)
3318 {
3319 rtx delta = GEN_INT (-64);
3320 FRP (set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64));
3321 FRP (emit_insn (gen_pre_load (frame_pointer_rtx,
3322 stack_pointer_rtx,
3323 delta)));
3324 }
3325 /* If we were deferring a callee register restore, do it now. */
3326 else if (merge_sp_adjust_with_load)
3327 {
3328 rtx delta = GEN_INT (-actual_fsize);
3329 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3330 FRP (emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)));
3331 }
3332 else if (actual_fsize != 0)
3333 FRP (set_reg_plus_d (STACK_POINTER_REGNUM,
3334 STACK_POINTER_REGNUM,
3335 - actual_fsize));
3336
3337 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3338 frame greater than 8k), do so now. */
3339 if (ret_off != 0)
3340 FRP (load_reg (2, ret_off, STACK_POINTER_REGNUM));
3341 }
3342
3343 /* Set up a callee saved register for the pic offset table register. */
3344 void
3345 hppa_init_pic_save ()
3346 {
3347 rtx insn, picreg;
3348
3349 picreg = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
3350 PIC_OFFSET_TABLE_SAVE_RTX = gen_reg_rtx (Pmode);
3351 RTX_UNCHANGING_P (PIC_OFFSET_TABLE_SAVE_RTX) = 1;
3352 insn = gen_rtx_SET (VOIDmode, PIC_OFFSET_TABLE_SAVE_RTX, picreg);
3353
3354 /* Emit the insn at the beginning of the function after the prologue. */
3355 if (tail_recursion_reentry)
3356 emit_insn_before (insn, tail_recursion_reentry);
3357 else
3358 /* We must have been called via PROFILE_HOOK. */
3359 emit_insn (insn);
3360 }
3361
3362 void
3363 hppa_profile_hook (label_no)
3364 int label_no ATTRIBUTE_UNUSED;
3365 {
3366 rtx call_insn;
3367
3368 /* No profiling for inline functions. We don't want extra calls to
3369 _mcount when the inline function is expanded. Even if that made
3370 sense, it wouldn't work here as there is no function label for
3371 the inline expansion. */
3372 if (DECL_INLINE (cfun->decl))
3373 return;
3374
3375 if (TARGET_64BIT)
3376 emit_move_insn (arg_pointer_rtx,
3377 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3378 GEN_INT (64)));
3379
3380 if (flag_pic && PIC_OFFSET_TABLE_SAVE_RTX == NULL_RTX)
3381 hppa_init_pic_save ();
3382
3383 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3384
3385 #ifndef NO_PROFILE_COUNTERS
3386 {
3387 rtx count_label_rtx, addr, r24;
3388 char label_name[16];
3389
3390 ASM_GENERATE_INTERNAL_LABEL (label_name, "LP", label_no);
3391 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (label_name));
3392
3393 if (flag_pic)
3394 {
3395 rtx tmpreg;
3396
3397 current_function_uses_pic_offset_table = 1;
3398 tmpreg = gen_rtx_REG (Pmode, 1);
3399 emit_move_insn (tmpreg,
3400 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3401 gen_rtx_HIGH (Pmode, count_label_rtx)));
3402 addr = gen_rtx_MEM (Pmode,
3403 gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx));
3404 }
3405 else
3406 {
3407 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3408 emit_move_insn (tmpreg, gen_rtx_HIGH (Pmode, count_label_rtx));
3409 addr = gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx);
3410 }
3411 r24 = gen_rtx_REG (Pmode, 24);
3412 emit_move_insn (r24, addr);
3413
3414 /* %r25 is set from within the output pattern. */
3415 call_insn =
3416 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3417 GEN_INT (TARGET_64BIT ? 24 : 12),
3418 XEXP (DECL_RTL (cfun->decl), 0)));
3419
3420 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3421 }
3422 #else
3423 /* %r25 is set from within the output pattern. */
3424 call_insn =
3425 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3426 GEN_INT (TARGET_64BIT ? 16 : 8),
3427 XEXP (DECL_RTL (cfun->decl), 0)));
3428 #endif
3429
3430 /* Indicate the _mcount call cannot throw, nor will it execute a
3431 non-local goto. */
3432 REG_NOTES (call_insn)
3433 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3434
3435 if (flag_pic)
3436 {
3437 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3438 if (TARGET_64BIT)
3439 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3440
3441 emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
3442 }
3443 }
3444
3445 /* Fetch the return address for the frame COUNT steps up from
3446 the current frame, after the prologue. FRAMEADDR is the
3447 frame pointer of the COUNT frame.
3448
3449 We want to ignore any export stub remnants here.
3450
3451 The value returned is used in two different ways:
3452
3453 1. To find a function's caller.
3454
3455 2. To change the return address for a function.
3456
3457 This function handles most instances of case 1; however, it will
3458 fail if there are two levels of stubs to execute on the return
3459 path. The only way I believe that can happen is if the return value
3460 needs a parameter relocation, which never happens for C code.
3461
3462 This function handles most instances of case 2; however, it will
3463 fail if we did not originally have stub code on the return path
3464 but will need code on the new return path. This can happen if
3465 the caller & callee are both in the main program, but the new
3466 return location is in a shared library.
3467
3468 To handle this correctly we need to set the return pointer at
3469 frame-20 to point to a return stub frame-24 to point to the
3470 location we wish to return to. */
3471
3472 rtx
3473 return_addr_rtx (count, frameaddr)
3474 int count ATTRIBUTE_UNUSED;
3475 rtx frameaddr;
3476 {
3477 rtx label;
3478 rtx saved_rp;
3479 rtx ins;
3480
3481 if (TARGET_64BIT)
3482 return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -16));
3483
3484 if (TARGET_NO_SPACE_REGS)
3485 return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -20));
3486
3487 /* First, we start off with the normal return address pointer from
3488 -20[frameaddr]. */
3489
3490 saved_rp = gen_reg_rtx (Pmode);
3491 emit_move_insn (saved_rp, plus_constant (frameaddr, -20));
3492
3493 /* Get pointer to the instruction stream. We have to mask out the
3494 privilege level from the two low order bits of the return address
3495 pointer here so that ins will point to the start of the first
3496 instruction that would have been executed if we returned. */
3497 ins = copy_to_reg (gen_rtx_AND (Pmode,
3498 copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)),
3499 MASK_RETURN_ADDR));
3500 label = gen_label_rtx ();
3501
3502 /* Check the instruction stream at the normal return address for the
3503 export stub:
3504
3505 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3506 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3507 0x00011820 | stub+16: mtsp r1,sr0
3508 0xe0400002 | stub+20: be,n 0(sr0,rp)
3509
3510 If it is an export stub, than our return address is really in
3511 -24[frameaddr]. */
3512
3513 emit_cmp_insn (gen_rtx_MEM (SImode, ins),
3514 GEN_INT (0x4bc23fd1),
3515 NE, NULL_RTX, SImode, 1, 0);
3516 emit_jump_insn (gen_bne (label));
3517
3518 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3519 GEN_INT (0x004010a1),
3520 NE, NULL_RTX, SImode, 1, 0);
3521 emit_jump_insn (gen_bne (label));
3522
3523 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3524 GEN_INT (0x00011820),
3525 NE, NULL_RTX, SImode, 1, 0);
3526 emit_jump_insn (gen_bne (label));
3527
3528 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3529 GEN_INT (0xe0400002),
3530 NE, NULL_RTX, SImode, 1, 0);
3531
3532 /* If there is no export stub then just use our initial guess of
3533 -20[frameaddr]. */
3534
3535 emit_jump_insn (gen_bne (label));
3536
3537 /* Here we know that our return address pointer points to an export
3538 stub. We don't want to return the address of the export stub,
3539 but rather the return address that leads back into user code.
3540 That return address is stored at -24[frameaddr]. */
3541
3542 emit_move_insn (saved_rp, plus_constant (frameaddr, -24));
3543
3544 emit_label (label);
3545 return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp));
3546 }
3547
3548 /* This is only valid once reload has completed because it depends on
3549 knowing exactly how much (if any) frame there is and...
3550
3551 It's only valid if there is no frame marker to de-allocate and...
3552
3553 It's only valid if %r2 hasn't been saved into the caller's frame
3554 (we're not profiling and %r2 isn't live anywhere). */
3555 int
3556 hppa_can_use_return_insn_p ()
3557 {
3558 return (reload_completed
3559 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3560 && ! regs_ever_live[2]
3561 && ! frame_pointer_needed);
3562 }
3563
3564 void
3565 emit_bcond_fp (code, operand0)
3566 enum rtx_code code;
3567 rtx operand0;
3568 {
3569 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3570 gen_rtx_IF_THEN_ELSE (VOIDmode,
3571 gen_rtx_fmt_ee (code,
3572 VOIDmode,
3573 gen_rtx_REG (CCFPmode, 0),
3574 const0_rtx),
3575 gen_rtx_LABEL_REF (VOIDmode, operand0),
3576 pc_rtx)));
3577
3578 }
3579
3580 rtx
3581 gen_cmp_fp (code, operand0, operand1)
3582 enum rtx_code code;
3583 rtx operand0, operand1;
3584 {
3585 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3586 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3587 }
3588
3589 /* Adjust the cost of a scheduling dependency. Return the new cost of
3590 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3591
3592 int
3593 pa_adjust_cost (insn, link, dep_insn, cost)
3594 rtx insn;
3595 rtx link;
3596 rtx dep_insn;
3597 int cost;
3598 {
3599 enum attr_type attr_type;
3600
3601 /* Don't adjust costs for a pa8000 chip. */
3602 if (pa_cpu >= PROCESSOR_8000)
3603 return cost;
3604
3605 if (! recog_memoized (insn))
3606 return 0;
3607
3608 attr_type = get_attr_type (insn);
3609
3610 if (REG_NOTE_KIND (link) == 0)
3611 {
3612 /* Data dependency; DEP_INSN writes a register that INSN reads some
3613 cycles later. */
3614
3615 if (attr_type == TYPE_FPSTORE)
3616 {
3617 rtx pat = PATTERN (insn);
3618 rtx dep_pat = PATTERN (dep_insn);
3619 if (GET_CODE (pat) == PARALLEL)
3620 {
3621 /* This happens for the fstXs,mb patterns. */
3622 pat = XVECEXP (pat, 0, 0);
3623 }
3624 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3625 /* If this happens, we have to extend this to schedule
3626 optimally. Return 0 for now. */
3627 return 0;
3628
3629 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3630 {
3631 if (! recog_memoized (dep_insn))
3632 return 0;
3633 /* DEP_INSN is writing its result to the register
3634 being stored in the fpstore INSN. */
3635 switch (get_attr_type (dep_insn))
3636 {
3637 case TYPE_FPLOAD:
3638 /* This cost 3 cycles, not 2 as the md says for the
3639 700 and 7100. */
3640 return cost + 1;
3641
3642 case TYPE_FPALU:
3643 case TYPE_FPMULSGL:
3644 case TYPE_FPMULDBL:
3645 case TYPE_FPDIVSGL:
3646 case TYPE_FPDIVDBL:
3647 case TYPE_FPSQRTSGL:
3648 case TYPE_FPSQRTDBL:
3649 /* In these important cases, we save one cycle compared to
3650 when flop instruction feed each other. */
3651 return cost - 1;
3652
3653 default:
3654 return cost;
3655 }
3656 }
3657 }
3658
3659 /* For other data dependencies, the default cost specified in the
3660 md is correct. */
3661 return cost;
3662 }
3663 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3664 {
3665 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3666 cycles later. */
3667
3668 if (attr_type == TYPE_FPLOAD)
3669 {
3670 rtx pat = PATTERN (insn);
3671 rtx dep_pat = PATTERN (dep_insn);
3672 if (GET_CODE (pat) == PARALLEL)
3673 {
3674 /* This happens for the fldXs,mb patterns. */
3675 pat = XVECEXP (pat, 0, 0);
3676 }
3677 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3678 /* If this happens, we have to extend this to schedule
3679 optimally. Return 0 for now. */
3680 return 0;
3681
3682 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3683 {
3684 if (! recog_memoized (dep_insn))
3685 return 0;
3686 switch (get_attr_type (dep_insn))
3687 {
3688 case TYPE_FPALU:
3689 case TYPE_FPMULSGL:
3690 case TYPE_FPMULDBL:
3691 case TYPE_FPDIVSGL:
3692 case TYPE_FPDIVDBL:
3693 case TYPE_FPSQRTSGL:
3694 case TYPE_FPSQRTDBL:
3695 /* A fpload can't be issued until one cycle before a
3696 preceding arithmetic operation has finished if
3697 the target of the fpload is any of the sources
3698 (or destination) of the arithmetic operation. */
3699 return cost - 1;
3700
3701 default:
3702 return 0;
3703 }
3704 }
3705 }
3706 else if (attr_type == TYPE_FPALU)
3707 {
3708 rtx pat = PATTERN (insn);
3709 rtx dep_pat = PATTERN (dep_insn);
3710 if (GET_CODE (pat) == PARALLEL)
3711 {
3712 /* This happens for the fldXs,mb patterns. */
3713 pat = XVECEXP (pat, 0, 0);
3714 }
3715 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3716 /* If this happens, we have to extend this to schedule
3717 optimally. Return 0 for now. */
3718 return 0;
3719
3720 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3721 {
3722 if (! recog_memoized (dep_insn))
3723 return 0;
3724 switch (get_attr_type (dep_insn))
3725 {
3726 case TYPE_FPDIVSGL:
3727 case TYPE_FPDIVDBL:
3728 case TYPE_FPSQRTSGL:
3729 case TYPE_FPSQRTDBL:
3730 /* An ALU flop can't be issued until two cycles before a
3731 preceding divide or sqrt operation has finished if
3732 the target of the ALU flop is any of the sources
3733 (or destination) of the divide or sqrt operation. */
3734 return cost - 2;
3735
3736 default:
3737 return 0;
3738 }
3739 }
3740 }
3741
3742 /* For other anti dependencies, the cost is 0. */
3743 return 0;
3744 }
3745 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3746 {
3747 /* Output dependency; DEP_INSN writes a register that INSN writes some
3748 cycles later. */
3749 if (attr_type == TYPE_FPLOAD)
3750 {
3751 rtx pat = PATTERN (insn);
3752 rtx dep_pat = PATTERN (dep_insn);
3753 if (GET_CODE (pat) == PARALLEL)
3754 {
3755 /* This happens for the fldXs,mb patterns. */
3756 pat = XVECEXP (pat, 0, 0);
3757 }
3758 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3759 /* If this happens, we have to extend this to schedule
3760 optimally. Return 0 for now. */
3761 return 0;
3762
3763 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3764 {
3765 if (! recog_memoized (dep_insn))
3766 return 0;
3767 switch (get_attr_type (dep_insn))
3768 {
3769 case TYPE_FPALU:
3770 case TYPE_FPMULSGL:
3771 case TYPE_FPMULDBL:
3772 case TYPE_FPDIVSGL:
3773 case TYPE_FPDIVDBL:
3774 case TYPE_FPSQRTSGL:
3775 case TYPE_FPSQRTDBL:
3776 /* A fpload can't be issued until one cycle before a
3777 preceding arithmetic operation has finished if
3778 the target of the fpload is the destination of the
3779 arithmetic operation. */
3780 return cost - 1;
3781
3782 default:
3783 return 0;
3784 }
3785 }
3786 }
3787 else if (attr_type == TYPE_FPALU)
3788 {
3789 rtx pat = PATTERN (insn);
3790 rtx dep_pat = PATTERN (dep_insn);
3791 if (GET_CODE (pat) == PARALLEL)
3792 {
3793 /* This happens for the fldXs,mb patterns. */
3794 pat = XVECEXP (pat, 0, 0);
3795 }
3796 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3797 /* If this happens, we have to extend this to schedule
3798 optimally. Return 0 for now. */
3799 return 0;
3800
3801 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3802 {
3803 if (! recog_memoized (dep_insn))
3804 return 0;
3805 switch (get_attr_type (dep_insn))
3806 {
3807 case TYPE_FPDIVSGL:
3808 case TYPE_FPDIVDBL:
3809 case TYPE_FPSQRTSGL:
3810 case TYPE_FPSQRTDBL:
3811 /* An ALU flop can't be issued until two cycles before a
3812 preceding divide or sqrt operation has finished if
3813 the target of the ALU flop is also the target of
3814 the divide or sqrt operation. */
3815 return cost - 2;
3816
3817 default:
3818 return 0;
3819 }
3820 }
3821 }
3822
3823 /* For other output dependencies, the cost is 0. */
3824 return 0;
3825 }
3826 else
3827 abort ();
3828 }
3829
3830 /* Return any length adjustment needed by INSN which already has its length
3831 computed as LENGTH. Return zero if no adjustment is necessary.
3832
3833 For the PA: function calls, millicode calls, and backwards short
3834 conditional branches with unfilled delay slots need an adjustment by +1
3835 (to account for the NOP which will be inserted into the instruction stream).
3836
3837 Also compute the length of an inline block move here as it is too
3838 complicated to express as a length attribute in pa.md. */
3839 int
3840 pa_adjust_insn_length (insn, length)
3841 rtx insn;
3842 int length;
3843 {
3844 rtx pat = PATTERN (insn);
3845
3846 /* Call insns which are *not* indirect and have unfilled delay slots. */
3847 if (GET_CODE (insn) == CALL_INSN)
3848 {
3849
3850 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3851 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3852 return 4;
3853 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3854 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3855 == SYMBOL_REF)
3856 return 4;
3857 else
3858 return 0;
3859 }
3860 /* Jumps inside switch tables which have unfilled delay slots
3861 also need adjustment. */
3862 else if (GET_CODE (insn) == JUMP_INSN
3863 && simplejump_p (insn)
3864 && GET_MODE (insn) == SImode)
3865 return 4;
3866 /* Millicode insn with an unfilled delay slot. */
3867 else if (GET_CODE (insn) == INSN
3868 && GET_CODE (pat) != SEQUENCE
3869 && GET_CODE (pat) != USE
3870 && GET_CODE (pat) != CLOBBER
3871 && get_attr_type (insn) == TYPE_MILLI)
3872 return 4;
3873 /* Block move pattern. */
3874 else if (GET_CODE (insn) == INSN
3875 && GET_CODE (pat) == PARALLEL
3876 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
3877 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3878 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3879 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3880 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3881 return compute_movstrsi_length (insn) - 4;
3882 /* Conditional branch with an unfilled delay slot. */
3883 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3884 {
3885 /* Adjust a short backwards conditional with an unfilled delay slot. */
3886 if (GET_CODE (pat) == SET
3887 && length == 4
3888 && ! forward_branch_p (insn))
3889 return 4;
3890 else if (GET_CODE (pat) == PARALLEL
3891 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3892 && length == 4)
3893 return 4;
3894 /* Adjust dbra insn with short backwards conditional branch with
3895 unfilled delay slot -- only for case where counter is in a
3896 general register register. */
3897 else if (GET_CODE (pat) == PARALLEL
3898 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3899 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3900 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3901 && length == 4
3902 && ! forward_branch_p (insn))
3903 return 4;
3904 else
3905 return 0;
3906 }
3907 return 0;
3908 }
3909
3910 /* Print operand X (an rtx) in assembler syntax to file FILE.
3911 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3912 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3913
3914 void
3915 print_operand (file, x, code)
3916 FILE *file;
3917 rtx x;
3918 int code;
3919 {
3920 switch (code)
3921 {
3922 case '#':
3923 /* Output a 'nop' if there's nothing for the delay slot. */
3924 if (dbr_sequence_length () == 0)
3925 fputs ("\n\tnop", file);
3926 return;
3927 case '*':
3928 /* Output an nullification completer if there's nothing for the */
3929 /* delay slot or nullification is requested. */
3930 if (dbr_sequence_length () == 0 ||
3931 (final_sequence &&
3932 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3933 fputs (",n", file);
3934 return;
3935 case 'R':
3936 /* Print out the second register name of a register pair.
3937 I.e., R (6) => 7. */
3938 fputs (reg_names[REGNO (x)+1], file);
3939 return;
3940 case 'r':
3941 /* A register or zero. */
3942 if (x == const0_rtx
3943 || (x == CONST0_RTX (DFmode))
3944 || (x == CONST0_RTX (SFmode)))
3945 {
3946 fputs ("%r0", file);
3947 return;
3948 }
3949 else
3950 break;
3951 case 'f':
3952 /* A register or zero (floating point). */
3953 if (x == const0_rtx
3954 || (x == CONST0_RTX (DFmode))
3955 || (x == CONST0_RTX (SFmode)))
3956 {
3957 fputs ("%fr0", file);
3958 return;
3959 }
3960 else
3961 break;
3962 case 'A':
3963 {
3964 rtx xoperands[2];
3965
3966 xoperands[0] = XEXP (XEXP (x, 0), 0);
3967 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
3968 output_global_address (file, xoperands[1], 0);
3969 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
3970 return;
3971 }
3972
3973 case 'C': /* Plain (C)ondition */
3974 case 'X':
3975 switch (GET_CODE (x))
3976 {
3977 case EQ:
3978 fputs ("=", file); break;
3979 case NE:
3980 fputs ("<>", file); break;
3981 case GT:
3982 fputs (">", file); break;
3983 case GE:
3984 fputs (">=", file); break;
3985 case GEU:
3986 fputs (">>=", file); break;
3987 case GTU:
3988 fputs (">>", file); break;
3989 case LT:
3990 fputs ("<", file); break;
3991 case LE:
3992 fputs ("<=", file); break;
3993 case LEU:
3994 fputs ("<<=", file); break;
3995 case LTU:
3996 fputs ("<<", file); break;
3997 default:
3998 abort ();
3999 }
4000 return;
4001 case 'N': /* Condition, (N)egated */
4002 switch (GET_CODE (x))
4003 {
4004 case EQ:
4005 fputs ("<>", file); break;
4006 case NE:
4007 fputs ("=", file); break;
4008 case GT:
4009 fputs ("<=", file); break;
4010 case GE:
4011 fputs ("<", file); break;
4012 case GEU:
4013 fputs ("<<", file); break;
4014 case GTU:
4015 fputs ("<<=", file); break;
4016 case LT:
4017 fputs (">=", file); break;
4018 case LE:
4019 fputs (">", file); break;
4020 case LEU:
4021 fputs (">>", file); break;
4022 case LTU:
4023 fputs (">>=", file); break;
4024 default:
4025 abort ();
4026 }
4027 return;
4028 /* For floating point comparisons. Note that the output predicates are the
4029 complement of the desired mode. */
4030 case 'Y':
4031 switch (GET_CODE (x))
4032 {
4033 case EQ:
4034 fputs ("!=", file); break;
4035 case NE:
4036 fputs ("=", file); break;
4037 case GT:
4038 fputs ("!>", file); break;
4039 case GE:
4040 fputs ("!>=", file); break;
4041 case LT:
4042 fputs ("!<", file); break;
4043 case LE:
4044 fputs ("!<=", file); break;
4045 case LTGT:
4046 fputs ("!<>", file); break;
4047 case UNLE:
4048 fputs (">", file); break;
4049 case UNLT:
4050 fputs (">=", file); break;
4051 case UNGE:
4052 fputs ("<", file); break;
4053 case UNGT:
4054 fputs ("<=", file); break;
4055 case UNEQ:
4056 fputs ("<>", file); break;
4057 case UNORDERED:
4058 fputs ("<=>", file); break;
4059 case ORDERED:
4060 fputs ("!<=>", file); break;
4061 default:
4062 abort ();
4063 }
4064 return;
4065 case 'S': /* Condition, operands are (S)wapped. */
4066 switch (GET_CODE (x))
4067 {
4068 case EQ:
4069 fputs ("=", file); break;
4070 case NE:
4071 fputs ("<>", file); break;
4072 case GT:
4073 fputs ("<", file); break;
4074 case GE:
4075 fputs ("<=", file); break;
4076 case GEU:
4077 fputs ("<<=", file); break;
4078 case GTU:
4079 fputs ("<<", file); break;
4080 case LT:
4081 fputs (">", file); break;
4082 case LE:
4083 fputs (">=", file); break;
4084 case LEU:
4085 fputs (">>=", file); break;
4086 case LTU:
4087 fputs (">>", file); break;
4088 default:
4089 abort ();
4090 }
4091 return;
4092 case 'B': /* Condition, (B)oth swapped and negate. */
4093 switch (GET_CODE (x))
4094 {
4095 case EQ:
4096 fputs ("<>", file); break;
4097 case NE:
4098 fputs ("=", file); break;
4099 case GT:
4100 fputs (">=", file); break;
4101 case GE:
4102 fputs (">", file); break;
4103 case GEU:
4104 fputs (">>", file); break;
4105 case GTU:
4106 fputs (">>=", file); break;
4107 case LT:
4108 fputs ("<=", file); break;
4109 case LE:
4110 fputs ("<", file); break;
4111 case LEU:
4112 fputs ("<<", file); break;
4113 case LTU:
4114 fputs ("<<=", file); break;
4115 default:
4116 abort ();
4117 }
4118 return;
4119 case 'k':
4120 if (GET_CODE (x) == CONST_INT)
4121 {
4122 fprintf (file, "%d", ~INTVAL (x));
4123 return;
4124 }
4125 abort();
4126 case 'Q':
4127 if (GET_CODE (x) == CONST_INT)
4128 {
4129 fprintf (file, "%d", 64 - (INTVAL (x) & 63));
4130 return;
4131 }
4132 abort();
4133 case 'L':
4134 if (GET_CODE (x) == CONST_INT)
4135 {
4136 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
4137 return;
4138 }
4139 abort();
4140 case 'O':
4141 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4142 {
4143 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4144 return;
4145 }
4146 abort();
4147 case 'p':
4148 if (GET_CODE (x) == CONST_INT)
4149 {
4150 fprintf (file, "%d", 63 - (INTVAL (x) & 63));
4151 return;
4152 }
4153 abort();
4154 case 'P':
4155 if (GET_CODE (x) == CONST_INT)
4156 {
4157 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
4158 return;
4159 }
4160 abort();
4161 case 'I':
4162 if (GET_CODE (x) == CONST_INT)
4163 fputs ("i", file);
4164 return;
4165 case 'M':
4166 case 'F':
4167 switch (GET_CODE (XEXP (x, 0)))
4168 {
4169 case PRE_DEC:
4170 case PRE_INC:
4171 if (ASSEMBLER_DIALECT == 0)
4172 fputs ("s,mb", file);
4173 else
4174 fputs (",mb", file);
4175 break;
4176 case POST_DEC:
4177 case POST_INC:
4178 if (ASSEMBLER_DIALECT == 0)
4179 fputs ("s,ma", file);
4180 else
4181 fputs (",ma", file);
4182 break;
4183 case PLUS:
4184 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4185 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4186 {
4187 if (ASSEMBLER_DIALECT == 0)
4188 fputs ("x,s", file);
4189 else
4190 fputs (",s", file);
4191 }
4192 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4193 fputs ("s", file);
4194 break;
4195 default:
4196 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4197 fputs ("s", file);
4198 break;
4199 }
4200 return;
4201 case 'G':
4202 output_global_address (file, x, 0);
4203 return;
4204 case 'H':
4205 output_global_address (file, x, 1);
4206 return;
4207 case 0: /* Don't do anything special */
4208 break;
4209 case 'Z':
4210 {
4211 unsigned op[3];
4212 compute_zdepwi_operands (INTVAL (x), op);
4213 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4214 return;
4215 }
4216 case 'z':
4217 {
4218 unsigned op[3];
4219 compute_zdepdi_operands (INTVAL (x), op);
4220 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4221 return;
4222 }
4223 case 'c':
4224 /* We can get here from a .vtable_inherit due to our
4225 CONSTANT_ADDRESS_P rejecting perfectly good constant
4226 addresses. */
4227 break;
4228 default:
4229 abort ();
4230 }
4231 if (GET_CODE (x) == REG)
4232 {
4233 fputs (reg_names [REGNO (x)], file);
4234 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4235 {
4236 fputs ("R", file);
4237 return;
4238 }
4239 if (FP_REG_P (x)
4240 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4241 && (REGNO (x) & 1) == 0)
4242 fputs ("L", file);
4243 }
4244 else if (GET_CODE (x) == MEM)
4245 {
4246 int size = GET_MODE_SIZE (GET_MODE (x));
4247 rtx base = NULL_RTX;
4248 switch (GET_CODE (XEXP (x, 0)))
4249 {
4250 case PRE_DEC:
4251 case POST_DEC:
4252 base = XEXP (XEXP (x, 0), 0);
4253 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4254 break;
4255 case PRE_INC:
4256 case POST_INC:
4257 base = XEXP (XEXP (x, 0), 0);
4258 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4259 break;
4260 default:
4261 if (GET_CODE (XEXP (x, 0)) == PLUS
4262 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4263 fprintf (file, "%s(%s)",
4264 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4265 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4266 else if (GET_CODE (XEXP (x, 0)) == PLUS
4267 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4268 fprintf (file, "%s(%s)",
4269 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4270 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4271 else
4272 output_address (XEXP (x, 0));
4273 break;
4274 }
4275 }
4276 else
4277 output_addr_const (file, x);
4278 }
4279
4280 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4281
4282 void
4283 output_global_address (file, x, round_constant)
4284 FILE *file;
4285 rtx x;
4286 int round_constant;
4287 {
4288
4289 /* Imagine (high (const (plus ...))). */
4290 if (GET_CODE (x) == HIGH)
4291 x = XEXP (x, 0);
4292
4293 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4294 assemble_name (file, XSTR (x, 0));
4295 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4296 {
4297 assemble_name (file, XSTR (x, 0));
4298 fputs ("-$global$", file);
4299 }
4300 else if (GET_CODE (x) == CONST)
4301 {
4302 const char *sep = "";
4303 int offset = 0; /* assembler wants -$global$ at end */
4304 rtx base = NULL_RTX;
4305
4306 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4307 {
4308 base = XEXP (XEXP (x, 0), 0);
4309 output_addr_const (file, base);
4310 }
4311 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4312 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4313 else abort ();
4314
4315 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4316 {
4317 base = XEXP (XEXP (x, 0), 1);
4318 output_addr_const (file, base);
4319 }
4320 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4321 offset = INTVAL (XEXP (XEXP (x, 0),1));
4322 else abort ();
4323
4324 /* How bogus. The compiler is apparently responsible for
4325 rounding the constant if it uses an LR field selector.
4326
4327 The linker and/or assembler seem a better place since
4328 they have to do this kind of thing already.
4329
4330 If we fail to do this, HP's optimizing linker may eliminate
4331 an addil, but not update the ldw/stw/ldo instruction that
4332 uses the result of the addil. */
4333 if (round_constant)
4334 offset = ((offset + 0x1000) & ~0x1fff);
4335
4336 if (GET_CODE (XEXP (x, 0)) == PLUS)
4337 {
4338 if (offset < 0)
4339 {
4340 offset = -offset;
4341 sep = "-";
4342 }
4343 else
4344 sep = "+";
4345 }
4346 else if (GET_CODE (XEXP (x, 0)) == MINUS
4347 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4348 sep = "-";
4349 else abort ();
4350
4351 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4352 fputs ("-$global$", file);
4353 if (offset)
4354 fprintf (file,"%s%d", sep, offset);
4355 }
4356 else
4357 output_addr_const (file, x);
4358 }
4359
4360 void
4361 output_deferred_plabels (file)
4362 FILE *file;
4363 {
4364 int i;
4365 /* If we have deferred plabels, then we need to switch into the data
4366 section and align it to a 4 byte boundary before we output the
4367 deferred plabels. */
4368 if (n_deferred_plabels)
4369 {
4370 data_section ();
4371 ASM_OUTPUT_ALIGN (file, 2);
4372 }
4373
4374 /* Now output the deferred plabels. */
4375 for (i = 0; i < n_deferred_plabels; i++)
4376 {
4377 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4378 assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode,
4379 deferred_plabels[i].name), 4, 1);
4380 }
4381 }
4382
4383 /* HP's millicode routines mean something special to the assembler.
4384 Keep track of which ones we have used. */
4385
4386 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
4387 static void import_milli PARAMS ((enum millicodes));
4388 static char imported[(int)end1000];
4389 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
4390 static char import_string[] = ".IMPORT $$....,MILLICODE";
4391 #define MILLI_START 10
4392
4393 static void
4394 import_milli (code)
4395 enum millicodes code;
4396 {
4397 char str[sizeof (import_string)];
4398
4399 if (!imported[(int)code])
4400 {
4401 imported[(int)code] = 1;
4402 strcpy (str, import_string);
4403 strncpy (str + MILLI_START, milli_names[(int)code], 4);
4404 output_asm_insn (str, 0);
4405 }
4406 }
4407
4408 /* The register constraints have put the operands and return value in
4409 the proper registers. */
4410
4411 const char *
4412 output_mul_insn (unsignedp, insn)
4413 int unsignedp ATTRIBUTE_UNUSED;
4414 rtx insn;
4415 {
4416 import_milli (mulI);
4417 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4418 }
4419
4420 /* Emit the rtl for doing a division by a constant. */
4421
4422 /* Do magic division millicodes exist for this value? */
4423 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4424 1, 1};
4425
4426 /* We'll use an array to keep track of the magic millicodes and
4427 whether or not we've used them already. [n][0] is signed, [n][1] is
4428 unsigned. */
4429
4430 static int div_milli[16][2];
4431
4432 int
4433 div_operand (op, mode)
4434 rtx op;
4435 enum machine_mode mode;
4436 {
4437 return (mode == SImode
4438 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4439 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4440 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4441 }
4442
4443 int
4444 emit_hpdiv_const (operands, unsignedp)
4445 rtx *operands;
4446 int unsignedp;
4447 {
4448 if (GET_CODE (operands[2]) == CONST_INT
4449 && INTVAL (operands[2]) > 0
4450 && INTVAL (operands[2]) < 16
4451 && magic_milli[INTVAL (operands[2])])
4452 {
4453 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4454 emit
4455 (gen_rtx
4456 (PARALLEL, VOIDmode,
4457 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4458 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4459 SImode,
4460 gen_rtx_REG (SImode, 26),
4461 operands[2])),
4462 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4463 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4464 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4465 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4466 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31)))));
4467 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4468 return 1;
4469 }
4470 return 0;
4471 }
4472
4473 const char *
4474 output_div_insn (operands, unsignedp, insn)
4475 rtx *operands;
4476 int unsignedp;
4477 rtx insn;
4478 {
4479 int divisor;
4480
4481 /* If the divisor is a constant, try to use one of the special
4482 opcodes .*/
4483 if (GET_CODE (operands[0]) == CONST_INT)
4484 {
4485 static char buf[100];
4486 divisor = INTVAL (operands[0]);
4487 if (!div_milli[divisor][unsignedp])
4488 {
4489 div_milli[divisor][unsignedp] = 1;
4490 if (unsignedp)
4491 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4492 else
4493 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4494 }
4495 if (unsignedp)
4496 {
4497 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
4498 return output_millicode_call (insn,
4499 gen_rtx_SYMBOL_REF (SImode, buf));
4500 }
4501 else
4502 {
4503 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
4504 return output_millicode_call (insn,
4505 gen_rtx_SYMBOL_REF (SImode, buf));
4506 }
4507 }
4508 /* Divisor isn't a special constant. */
4509 else
4510 {
4511 if (unsignedp)
4512 {
4513 import_milli (divU);
4514 return output_millicode_call (insn,
4515 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4516 }
4517 else
4518 {
4519 import_milli (divI);
4520 return output_millicode_call (insn,
4521 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4522 }
4523 }
4524 }
4525
4526 /* Output a $$rem millicode to do mod. */
4527
4528 const char *
4529 output_mod_insn (unsignedp, insn)
4530 int unsignedp;
4531 rtx insn;
4532 {
4533 if (unsignedp)
4534 {
4535 import_milli (remU);
4536 return output_millicode_call (insn,
4537 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4538 }
4539 else
4540 {
4541 import_milli (remI);
4542 return output_millicode_call (insn,
4543 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4544 }
4545 }
4546
4547 void
4548 output_arg_descriptor (call_insn)
4549 rtx call_insn;
4550 {
4551 const char *arg_regs[4];
4552 enum machine_mode arg_mode;
4553 rtx link;
4554 int i, output_flag = 0;
4555 int regno;
4556
4557 /* We neither need nor want argument location descriptors for the
4558 64bit runtime environment or the ELF32 environment. */
4559 if (TARGET_64BIT || TARGET_ELF32)
4560 return;
4561
4562 for (i = 0; i < 4; i++)
4563 arg_regs[i] = 0;
4564
4565 /* Specify explicitly that no argument relocations should take place
4566 if using the portable runtime calling conventions. */
4567 if (TARGET_PORTABLE_RUNTIME)
4568 {
4569 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4570 asm_out_file);
4571 return;
4572 }
4573
4574 if (GET_CODE (call_insn) != CALL_INSN)
4575 abort ();
4576 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4577 {
4578 rtx use = XEXP (link, 0);
4579
4580 if (! (GET_CODE (use) == USE
4581 && GET_CODE (XEXP (use, 0)) == REG
4582 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4583 continue;
4584
4585 arg_mode = GET_MODE (XEXP (use, 0));
4586 regno = REGNO (XEXP (use, 0));
4587 if (regno >= 23 && regno <= 26)
4588 {
4589 arg_regs[26 - regno] = "GR";
4590 if (arg_mode == DImode)
4591 arg_regs[25 - regno] = "GR";
4592 }
4593 else if (regno >= 32 && regno <= 39)
4594 {
4595 if (arg_mode == SFmode)
4596 arg_regs[(regno - 32) / 2] = "FR";
4597 else
4598 {
4599 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4600 arg_regs[(regno - 34) / 2] = "FR";
4601 arg_regs[(regno - 34) / 2 + 1] = "FU";
4602 #else
4603 arg_regs[(regno - 34) / 2] = "FU";
4604 arg_regs[(regno - 34) / 2 + 1] = "FR";
4605 #endif
4606 }
4607 }
4608 }
4609 fputs ("\t.CALL ", asm_out_file);
4610 for (i = 0; i < 4; i++)
4611 {
4612 if (arg_regs[i])
4613 {
4614 if (output_flag++)
4615 fputc (',', asm_out_file);
4616 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4617 }
4618 }
4619 fputc ('\n', asm_out_file);
4620 }
4621 \f
4622 /* Return the class of any secondary reload register that is needed to
4623 move IN into a register in class CLASS using mode MODE.
4624
4625 Profiling has showed this routine and its descendants account for
4626 a significant amount of compile time (~7%). So it has been
4627 optimized to reduce redundant computations and eliminate useless
4628 function calls.
4629
4630 It might be worthwhile to try and make this a leaf function too. */
4631
4632 enum reg_class
4633 secondary_reload_class (class, mode, in)
4634 enum reg_class class;
4635 enum machine_mode mode;
4636 rtx in;
4637 {
4638 int regno, is_symbolic;
4639
4640 /* Trying to load a constant into a FP register during PIC code
4641 generation will require %r1 as a scratch register. */
4642 if (flag_pic
4643 && GET_MODE_CLASS (mode) == MODE_INT
4644 && FP_REG_CLASS_P (class)
4645 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4646 return R1_REGS;
4647
4648 /* Profiling showed the PA port spends about 1.3% of its compilation
4649 time in true_regnum from calls inside secondary_reload_class. */
4650
4651 if (GET_CODE (in) == REG)
4652 {
4653 regno = REGNO (in);
4654 if (regno >= FIRST_PSEUDO_REGISTER)
4655 regno = true_regnum (in);
4656 }
4657 else if (GET_CODE (in) == SUBREG)
4658 regno = true_regnum (in);
4659 else
4660 regno = -1;
4661
4662 /* If we have something like (mem (mem (...)), we can safely assume the
4663 inner MEM will end up in a general register after reloading, so there's
4664 no need for a secondary reload. */
4665 if (GET_CODE (in) == MEM
4666 && GET_CODE (XEXP (in, 0)) == MEM)
4667 return NO_REGS;
4668
4669 /* Handle out of range displacement for integer mode loads/stores of
4670 FP registers. */
4671 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4672 && GET_MODE_CLASS (mode) == MODE_INT
4673 && FP_REG_CLASS_P (class))
4674 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4675 return GENERAL_REGS;
4676
4677 if (GET_CODE (in) == HIGH)
4678 in = XEXP (in, 0);
4679
4680 /* Profiling has showed GCC spends about 2.6% of its compilation
4681 time in symbolic_operand from calls inside secondary_reload_class.
4682
4683 We use an inline copy and only compute its return value once to avoid
4684 useless work. */
4685 switch (GET_CODE (in))
4686 {
4687 rtx tmp;
4688
4689 case SYMBOL_REF:
4690 case LABEL_REF:
4691 is_symbolic = 1;
4692 break;
4693 case CONST:
4694 tmp = XEXP (in, 0);
4695 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4696 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4697 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4698 break;
4699
4700 default:
4701 is_symbolic = 0;
4702 break;
4703 }
4704
4705 if (!flag_pic
4706 && is_symbolic
4707 && read_only_operand (in, VOIDmode))
4708 return NO_REGS;
4709
4710 if (class != R1_REGS && is_symbolic)
4711 return R1_REGS;
4712
4713 return NO_REGS;
4714 }
4715
4716 enum direction
4717 function_arg_padding (mode, type)
4718 enum machine_mode mode;
4719 tree type;
4720 {
4721 int size;
4722
4723 if (mode == BLKmode)
4724 {
4725 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4726 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4727 else
4728 return upward; /* Don't know if this is right, but */
4729 /* same as old definition. */
4730 }
4731 else
4732 size = GET_MODE_BITSIZE (mode);
4733 if (size < PARM_BOUNDARY)
4734 return downward;
4735 else if (size % PARM_BOUNDARY)
4736 return upward;
4737 else
4738 return none;
4739 }
4740
4741 \f
4742 /* Do what is necessary for `va_start'. We look at the current function
4743 to determine if stdargs or varargs is used and fill in an initial
4744 va_list. A pointer to this constructor is returned. */
4745
4746 struct rtx_def *
4747 hppa_builtin_saveregs ()
4748 {
4749 rtx offset, dest;
4750 tree fntype = TREE_TYPE (current_function_decl);
4751 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4752 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4753 != void_type_node)))
4754 ? UNITS_PER_WORD : 0);
4755
4756 if (argadj)
4757 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4758 else
4759 offset = current_function_arg_offset_rtx;
4760
4761 if (TARGET_64BIT)
4762 {
4763 int i, off;
4764
4765 /* Adjust for varargs/stdarg differences. */
4766 if (argadj)
4767 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
4768 else
4769 offset = current_function_arg_offset_rtx;
4770
4771 /* We need to save %r26 .. %r19 inclusive starting at offset -64
4772 from the incoming arg pointer and growing to larger addresses. */
4773 for (i = 26, off = -64; i >= 19; i--, off += 8)
4774 emit_move_insn (gen_rtx_MEM (word_mode,
4775 plus_constant (arg_pointer_rtx, off)),
4776 gen_rtx_REG (word_mode, i));
4777
4778 /* The incoming args pointer points just beyond the flushback area;
4779 normally this is not a serious concern. Howver, when we are doing
4780 varargs/stdargs we want to make the arg pointer point to the start
4781 of the incoming argument area. */
4782 emit_move_insn (virtual_incoming_args_rtx,
4783 plus_constant (arg_pointer_rtx, -64));
4784
4785 /* Now return a pointer to the first anonymous argument. */
4786 return copy_to_reg (expand_binop (Pmode, add_optab,
4787 virtual_incoming_args_rtx,
4788 offset, 0, 0, OPTAB_LIB_WIDEN));
4789 }
4790
4791 /* Store general registers on the stack. */
4792 dest = gen_rtx_MEM (BLKmode,
4793 plus_constant (current_function_internal_arg_pointer,
4794 -16));
4795 MEM_ALIAS_SET (dest) = get_varargs_alias_set ();
4796 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
4797
4798 /* move_block_from_reg will emit code to store the argument registers
4799 individually as scalar stores.
4800
4801 However, other insns may later load from the same addresses for
4802 a structure load (passing a struct to a varargs routine).
4803
4804 The alias code assumes that such aliasing can never happen, so we
4805 have to keep memory referencing insns from moving up beyond the
4806 last argument register store. So we emit a blockage insn here. */
4807 emit_insn (gen_blockage ());
4808
4809 if (current_function_check_memory_usage)
4810 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4811 dest, ptr_mode,
4812 GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype),
4813 GEN_INT (MEMORY_USE_RW),
4814 TYPE_MODE (integer_type_node));
4815
4816 return copy_to_reg (expand_binop (Pmode, add_optab,
4817 current_function_internal_arg_pointer,
4818 offset, 0, 0, OPTAB_LIB_WIDEN));
4819 }
4820
4821 void
4822 hppa_va_start (stdarg_p, valist, nextarg)
4823 int stdarg_p ATTRIBUTE_UNUSED;
4824 tree valist;
4825 rtx nextarg;
4826 {
4827 nextarg = expand_builtin_saveregs ();
4828 std_expand_builtin_va_start (1, valist, nextarg);
4829 }
4830
4831 rtx
4832 hppa_va_arg (valist, type)
4833 tree valist, type;
4834 {
4835 HOST_WIDE_INT align, size, ofs;
4836 tree t, ptr, pptr;
4837
4838 if (TARGET_64BIT)
4839 {
4840 /* Every argument in PA64 is passed by value (including large structs).
4841 Arguments with size greater than 8 must be aligned 0 MOD 16. */
4842
4843 size = int_size_in_bytes (type);
4844 if (size > UNITS_PER_WORD)
4845 {
4846 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4847 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
4848 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4849 build_int_2 (-2 * UNITS_PER_WORD, -1));
4850 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4851 TREE_SIDE_EFFECTS (t) = 1;
4852 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4853 }
4854 return std_expand_builtin_va_arg (valist, type);
4855 }
4856
4857 /* Compute the rounded size of the type. */
4858 align = PARM_BOUNDARY / BITS_PER_UNIT;
4859 size = int_size_in_bytes (type);
4860
4861 ptr = build_pointer_type (type);
4862
4863 /* "Large" types are passed by reference. */
4864 if (size > 8)
4865 {
4866 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
4867 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
4868 TREE_SIDE_EFFECTS (t) = 1;
4869
4870 pptr = build_pointer_type (ptr);
4871 t = build1 (NOP_EXPR, pptr, t);
4872 TREE_SIDE_EFFECTS (t) = 1;
4873
4874 t = build1 (INDIRECT_REF, ptr, t);
4875 TREE_SIDE_EFFECTS (t) = 1;
4876 }
4877 else
4878 {
4879 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4880 build_int_2 (-size, -1));
4881
4882 /* Copied from va-pa.h, but we probably don't need to align
4883 to word size, since we generate and preserve that invariant. */
4884 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
4885 build_int_2 ((size > 4 ? -8 : -4), -1));
4886
4887 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4888 TREE_SIDE_EFFECTS (t) = 1;
4889
4890 ofs = (8 - size) % 4;
4891 if (ofs)
4892 {
4893 t = build (PLUS_EXPR, TREE_TYPE (valist), t, build_int_2 (ofs, 0));
4894 TREE_SIDE_EFFECTS (t) = 1;
4895 }
4896
4897 t = build1 (NOP_EXPR, ptr, t);
4898 TREE_SIDE_EFFECTS (t) = 1;
4899 }
4900
4901 /* Calculate! */
4902 return expand_expr (t, NULL_RTX, Pmode, EXPAND_NORMAL);
4903 }
4904
4905
4906
4907 /* This routine handles all the normal conditional branch sequences we
4908 might need to generate. It handles compare immediate vs compare
4909 register, nullification of delay slots, varying length branches,
4910 negated branches, and all combinations of the above. It returns the
4911 output appropriate to emit the branch corresponding to all given
4912 parameters. */
4913
4914 const char *
4915 output_cbranch (operands, nullify, length, negated, insn)
4916 rtx *operands;
4917 int nullify, length, negated;
4918 rtx insn;
4919 {
4920 static char buf[100];
4921 int useskip = 0;
4922
4923 /* A conditional branch to the following instruction (eg the delay slot) is
4924 asking for a disaster. This can happen when not optimizing.
4925
4926 In such cases it is safe to emit nothing. */
4927
4928 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4929 return "";
4930
4931 /* If this is a long branch with its delay slot unfilled, set `nullify'
4932 as it can nullify the delay slot and save a nop. */
4933 if (length == 8 && dbr_sequence_length () == 0)
4934 nullify = 1;
4935
4936 /* If this is a short forward conditional branch which did not get
4937 its delay slot filled, the delay slot can still be nullified. */
4938 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4939 nullify = forward_branch_p (insn);
4940
4941 /* A forward branch over a single nullified insn can be done with a
4942 comclr instruction. This avoids a single cycle penalty due to
4943 mis-predicted branch if we fall through (branch not taken). */
4944 if (length == 4
4945 && next_real_insn (insn) != 0
4946 && get_attr_length (next_real_insn (insn)) == 4
4947 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4948 && nullify)
4949 useskip = 1;
4950
4951 switch (length)
4952 {
4953 /* All short conditional branches except backwards with an unfilled
4954 delay slot. */
4955 case 4:
4956 if (useskip)
4957 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
4958 else
4959 strcpy (buf, "{com%I2b,|cmp%I2b,}");
4960 if (GET_MODE (operands[1]) == DImode)
4961 strcat (buf, "*");
4962 if (negated)
4963 strcat (buf, "%B3");
4964 else
4965 strcat (buf, "%S3");
4966 if (useskip)
4967 strcat (buf, " %2,%r1,%%r0");
4968 else if (nullify)
4969 strcat (buf, ",n %2,%r1,%0");
4970 else
4971 strcat (buf, " %2,%r1,%0");
4972 break;
4973
4974 /* All long conditionals. Note an short backward branch with an
4975 unfilled delay slot is treated just like a long backward branch
4976 with an unfilled delay slot. */
4977 case 8:
4978 /* Handle weird backwards branch with a filled delay slot
4979 with is nullified. */
4980 if (dbr_sequence_length () != 0
4981 && ! forward_branch_p (insn)
4982 && nullify)
4983 {
4984 strcpy (buf, "{com%I2b,|cmp%I2b,}");
4985 if (GET_MODE (operands[1]) == DImode)
4986 strcat (buf, "*");
4987 if (negated)
4988 strcat (buf, "%S3");
4989 else
4990 strcat (buf, "%B3");
4991 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
4992 }
4993 /* Handle short backwards branch with an unfilled delay slot.
4994 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4995 taken and untaken branches. */
4996 else if (dbr_sequence_length () == 0
4997 && ! forward_branch_p (insn)
4998 && INSN_ADDRESSES_SET_P ()
4999 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5000 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5001 {
5002 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5003 if (GET_MODE (operands[1]) == DImode)
5004 strcat (buf, "*");
5005 if (negated)
5006 strcat (buf, "%B3 %2,%r1,%0%#");
5007 else
5008 strcat (buf, "%S3 %2,%r1,%0%#");
5009 }
5010 else
5011 {
5012 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5013 if (GET_MODE (operands[1]) == DImode)
5014 strcat (buf, "*");
5015 if (negated)
5016 strcat (buf, "%S3");
5017 else
5018 strcat (buf, "%B3");
5019 if (nullify)
5020 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5021 else
5022 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5023 }
5024 break;
5025
5026 case 20:
5027 /* Very long branch. Right now we only handle these when not
5028 optimizing. See "jump" pattern in pa.md for details. */
5029 if (optimize)
5030 abort ();
5031
5032 /* Create a reversed conditional branch which branches around
5033 the following insns. */
5034 if (negated)
5035 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5036 else
5037 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5038 if (GET_MODE (operands[1]) == DImode)
5039 {
5040 if (negated)
5041 strcpy (buf,
5042 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5043 else
5044 strcpy (buf,
5045 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5046 }
5047 output_asm_insn (buf, operands);
5048
5049 /* Output an insn to save %r1. */
5050 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5051
5052 /* Now output a very long branch to the original target. */
5053 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5054
5055 /* Now restore the value of %r1 in the delay slot. We're not
5056 optimizing so we know nothing else can be in the delay slot. */
5057 return "ldw -16(%%r30),%%r1";
5058
5059 case 28:
5060 /* Very long branch when generating PIC code. Right now we only
5061 handle these when not optimizing. See "jump" pattern in pa.md
5062 for details. */
5063 if (optimize)
5064 abort ();
5065
5066 /* Create a reversed conditional branch which branches around
5067 the following insns. */
5068 if (negated)
5069 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5070 else
5071 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5072 if (GET_MODE (operands[1]) == DImode)
5073 {
5074 if (negated)
5075 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5076 else
5077 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5078 }
5079 output_asm_insn (buf, operands);
5080
5081 /* Output an insn to save %r1. */
5082 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5083
5084 /* Now output a very long PIC branch to the original target. */
5085 {
5086 rtx xoperands[5];
5087
5088 xoperands[0] = operands[0];
5089 xoperands[1] = operands[1];
5090 xoperands[2] = operands[2];
5091 xoperands[3] = operands[3];
5092 xoperands[4] = gen_label_rtx ();
5093
5094 output_asm_insn ("{bl|b,l} .+8,%%r1\n\taddil L'%l0-%l4,%%r1",
5095 xoperands);
5096 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5097 CODE_LABEL_NUMBER (xoperands[4]));
5098 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv %%r0(%%r1)",
5099 xoperands);
5100 }
5101
5102 /* Now restore the value of %r1 in the delay slot. We're not
5103 optimizing so we know nothing else can be in the delay slot. */
5104 return "ldw -16(%%r30),%%r1";
5105
5106 default:
5107 abort();
5108 }
5109 return buf;
5110 }
5111
5112 /* This routine handles all the branch-on-bit conditional branch sequences we
5113 might need to generate. It handles nullification of delay slots,
5114 varying length branches, negated branches and all combinations of the
5115 above. it returns the appropriate output template to emit the branch. */
5116
5117 const char *
5118 output_bb (operands, nullify, length, negated, insn, which)
5119 rtx *operands ATTRIBUTE_UNUSED;
5120 int nullify, length, negated;
5121 rtx insn;
5122 int which;
5123 {
5124 static char buf[100];
5125 int useskip = 0;
5126
5127 /* A conditional branch to the following instruction (eg the delay slot) is
5128 asking for a disaster. I do not think this can happen as this pattern
5129 is only used when optimizing; jump optimization should eliminate the
5130 jump. But be prepared just in case. */
5131
5132 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5133 return "";
5134
5135 /* If this is a long branch with its delay slot unfilled, set `nullify'
5136 as it can nullify the delay slot and save a nop. */
5137 if (length == 8 && dbr_sequence_length () == 0)
5138 nullify = 1;
5139
5140 /* If this is a short forward conditional branch which did not get
5141 its delay slot filled, the delay slot can still be nullified. */
5142 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5143 nullify = forward_branch_p (insn);
5144
5145 /* A forward branch over a single nullified insn can be done with a
5146 extrs instruction. This avoids a single cycle penalty due to
5147 mis-predicted branch if we fall through (branch not taken). */
5148
5149 if (length == 4
5150 && next_real_insn (insn) != 0
5151 && get_attr_length (next_real_insn (insn)) == 4
5152 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5153 && nullify)
5154 useskip = 1;
5155
5156 switch (length)
5157 {
5158
5159 /* All short conditional branches except backwards with an unfilled
5160 delay slot. */
5161 case 4:
5162 if (useskip)
5163 strcpy (buf, "{extrs,|extrw,s,}");
5164 else
5165 strcpy (buf, "bb,");
5166 if (useskip && GET_MODE (operands[0]) == DImode)
5167 strcpy (buf, "extrd,s,*");
5168 else if (GET_MODE (operands[0]) == DImode)
5169 strcpy (buf, "bb,*");
5170 if ((which == 0 && negated)
5171 || (which == 1 && ! negated))
5172 strcat (buf, ">=");
5173 else
5174 strcat (buf, "<");
5175 if (useskip)
5176 strcat (buf, " %0,%1,1,%%r0");
5177 else if (nullify && negated)
5178 strcat (buf, ",n %0,%1,%3");
5179 else if (nullify && ! negated)
5180 strcat (buf, ",n %0,%1,%2");
5181 else if (! nullify && negated)
5182 strcat (buf, "%0,%1,%3");
5183 else if (! nullify && ! negated)
5184 strcat (buf, " %0,%1,%2");
5185 break;
5186
5187 /* All long conditionals. Note an short backward branch with an
5188 unfilled delay slot is treated just like a long backward branch
5189 with an unfilled delay slot. */
5190 case 8:
5191 /* Handle weird backwards branch with a filled delay slot
5192 with is nullified. */
5193 if (dbr_sequence_length () != 0
5194 && ! forward_branch_p (insn)
5195 && nullify)
5196 {
5197 strcpy (buf, "bb,");
5198 if (GET_MODE (operands[0]) == DImode)
5199 strcat (buf, "*");
5200 if ((which == 0 && negated)
5201 || (which == 1 && ! negated))
5202 strcat (buf, "<");
5203 else
5204 strcat (buf, ">=");
5205 if (negated)
5206 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5207 else
5208 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5209 }
5210 /* Handle short backwards branch with an unfilled delay slot.
5211 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5212 taken and untaken branches. */
5213 else if (dbr_sequence_length () == 0
5214 && ! forward_branch_p (insn)
5215 && INSN_ADDRESSES_SET_P ()
5216 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5217 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5218 {
5219 strcpy (buf, "bb,");
5220 if (GET_MODE (operands[0]) == DImode)
5221 strcat (buf, "*");
5222 if ((which == 0 && negated)
5223 || (which == 1 && ! negated))
5224 strcat (buf, ">=");
5225 else
5226 strcat (buf, "<");
5227 if (negated)
5228 strcat (buf, " %0,%1,%3%#");
5229 else
5230 strcat (buf, " %0,%1,%2%#");
5231 }
5232 else
5233 {
5234 strcpy (buf, "{extrs,|extrw,s,}");
5235 if (GET_MODE (operands[0]) == DImode)
5236 strcpy (buf, "extrd,s,*");
5237 if ((which == 0 && negated)
5238 || (which == 1 && ! negated))
5239 strcat (buf, "<");
5240 else
5241 strcat (buf, ">=");
5242 if (nullify && negated)
5243 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5244 else if (nullify && ! negated)
5245 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5246 else if (negated)
5247 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5248 else
5249 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5250 }
5251 break;
5252
5253 default:
5254 abort();
5255 }
5256 return buf;
5257 }
5258
5259 /* This routine handles all the branch-on-variable-bit conditional branch
5260 sequences we might need to generate. It handles nullification of delay
5261 slots, varying length branches, negated branches and all combinations
5262 of the above. it returns the appropriate output template to emit the
5263 branch. */
5264
5265 const char *
5266 output_bvb (operands, nullify, length, negated, insn, which)
5267 rtx *operands ATTRIBUTE_UNUSED;
5268 int nullify, length, negated;
5269 rtx insn;
5270 int which;
5271 {
5272 static char buf[100];
5273 int useskip = 0;
5274
5275 /* A conditional branch to the following instruction (eg the delay slot) is
5276 asking for a disaster. I do not think this can happen as this pattern
5277 is only used when optimizing; jump optimization should eliminate the
5278 jump. But be prepared just in case. */
5279
5280 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5281 return "";
5282
5283 /* If this is a long branch with its delay slot unfilled, set `nullify'
5284 as it can nullify the delay slot and save a nop. */
5285 if (length == 8 && dbr_sequence_length () == 0)
5286 nullify = 1;
5287
5288 /* If this is a short forward conditional branch which did not get
5289 its delay slot filled, the delay slot can still be nullified. */
5290 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5291 nullify = forward_branch_p (insn);
5292
5293 /* A forward branch over a single nullified insn can be done with a
5294 extrs instruction. This avoids a single cycle penalty due to
5295 mis-predicted branch if we fall through (branch not taken). */
5296
5297 if (length == 4
5298 && next_real_insn (insn) != 0
5299 && get_attr_length (next_real_insn (insn)) == 4
5300 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5301 && nullify)
5302 useskip = 1;
5303
5304 switch (length)
5305 {
5306
5307 /* All short conditional branches except backwards with an unfilled
5308 delay slot. */
5309 case 4:
5310 if (useskip)
5311 strcpy (buf, "{vextrs,|extrw,s,}");
5312 else
5313 strcpy (buf, "{bvb,|bb,}");
5314 if (useskip && GET_MODE (operands[0]) == DImode)
5315 strcpy (buf, "extrd,s,*}");
5316 else if (GET_MODE (operands[0]) == DImode)
5317 strcpy (buf, "bb,*");
5318 if ((which == 0 && negated)
5319 || (which == 1 && ! negated))
5320 strcat (buf, ">=");
5321 else
5322 strcat (buf, "<");
5323 if (useskip)
5324 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5325 else if (nullify && negated)
5326 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5327 else if (nullify && ! negated)
5328 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5329 else if (! nullify && negated)
5330 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5331 else if (! nullify && ! negated)
5332 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5333 break;
5334
5335 /* All long conditionals. Note an short backward branch with an
5336 unfilled delay slot is treated just like a long backward branch
5337 with an unfilled delay slot. */
5338 case 8:
5339 /* Handle weird backwards branch with a filled delay slot
5340 with is nullified. */
5341 if (dbr_sequence_length () != 0
5342 && ! forward_branch_p (insn)
5343 && nullify)
5344 {
5345 strcpy (buf, "{bvb,|bb,}");
5346 if (GET_MODE (operands[0]) == DImode)
5347 strcat (buf, "*");
5348 if ((which == 0 && negated)
5349 || (which == 1 && ! negated))
5350 strcat (buf, "<");
5351 else
5352 strcat (buf, ">=");
5353 if (negated)
5354 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5355 else
5356 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5357 }
5358 /* Handle short backwards branch with an unfilled delay slot.
5359 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5360 taken and untaken branches. */
5361 else if (dbr_sequence_length () == 0
5362 && ! forward_branch_p (insn)
5363 && INSN_ADDRESSES_SET_P ()
5364 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5365 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5366 {
5367 strcpy (buf, "{bvb,|bb,}");
5368 if (GET_MODE (operands[0]) == DImode)
5369 strcat (buf, "*");
5370 if ((which == 0 && negated)
5371 || (which == 1 && ! negated))
5372 strcat (buf, ">=");
5373 else
5374 strcat (buf, "<");
5375 if (negated)
5376 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5377 else
5378 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5379 }
5380 else
5381 {
5382 strcpy (buf, "{vextrs,|extrw,s,}");
5383 if (GET_MODE (operands[0]) == DImode)
5384 strcpy (buf, "extrd,s,*");
5385 if ((which == 0 && negated)
5386 || (which == 1 && ! negated))
5387 strcat (buf, "<");
5388 else
5389 strcat (buf, ">=");
5390 if (nullify && negated)
5391 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5392 else if (nullify && ! negated)
5393 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5394 else if (negated)
5395 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5396 else
5397 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5398 }
5399 break;
5400
5401 default:
5402 abort();
5403 }
5404 return buf;
5405 }
5406
5407 /* Return the output template for emitting a dbra type insn.
5408
5409 Note it may perform some output operations on its own before
5410 returning the final output string. */
5411 const char *
5412 output_dbra (operands, insn, which_alternative)
5413 rtx *operands;
5414 rtx insn;
5415 int which_alternative;
5416 {
5417
5418 /* A conditional branch to the following instruction (eg the delay slot) is
5419 asking for a disaster. Be prepared! */
5420
5421 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5422 {
5423 if (which_alternative == 0)
5424 return "ldo %1(%0),%0";
5425 else if (which_alternative == 1)
5426 {
5427 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)",operands);
5428 output_asm_insn ("ldw -16(%%r30),%4",operands);
5429 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5430 return "{fldws|fldw} -16(%%r30),%0";
5431 }
5432 else
5433 {
5434 output_asm_insn ("ldw %0,%4", operands);
5435 return "ldo %1(%4),%4\n\tstw %4,%0";
5436 }
5437 }
5438
5439 if (which_alternative == 0)
5440 {
5441 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5442 int length = get_attr_length (insn);
5443
5444 /* If this is a long branch with its delay slot unfilled, set `nullify'
5445 as it can nullify the delay slot and save a nop. */
5446 if (length == 8 && dbr_sequence_length () == 0)
5447 nullify = 1;
5448
5449 /* If this is a short forward conditional branch which did not get
5450 its delay slot filled, the delay slot can still be nullified. */
5451 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5452 nullify = forward_branch_p (insn);
5453
5454 /* Handle short versions first. */
5455 if (length == 4 && nullify)
5456 return "addib,%C2,n %1,%0,%3";
5457 else if (length == 4 && ! nullify)
5458 return "addib,%C2 %1,%0,%3";
5459 else if (length == 8)
5460 {
5461 /* Handle weird backwards branch with a fulled delay slot
5462 which is nullified. */
5463 if (dbr_sequence_length () != 0
5464 && ! forward_branch_p (insn)
5465 && nullify)
5466 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5467 /* Handle short backwards branch with an unfilled delay slot.
5468 Using a addb;nop rather than addi;bl saves 1 cycle for both
5469 taken and untaken branches. */
5470 else if (dbr_sequence_length () == 0
5471 && ! forward_branch_p (insn)
5472 && INSN_ADDRESSES_SET_P ()
5473 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5474 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5475 return "addib,%C2 %1,%0,%3%#";
5476
5477 /* Handle normal cases. */
5478 if (nullify)
5479 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5480 else
5481 return "addi,%N2 %1,%0,%0\n\tb %3";
5482 }
5483 else
5484 abort();
5485 }
5486 /* Deal with gross reload from FP register case. */
5487 else if (which_alternative == 1)
5488 {
5489 /* Move loop counter from FP register to MEM then into a GR,
5490 increment the GR, store the GR into MEM, and finally reload
5491 the FP register from MEM from within the branch's delay slot. */
5492 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",operands);
5493 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5494 if (get_attr_length (insn) == 24)
5495 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5496 else
5497 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5498 }
5499 /* Deal with gross reload from memory case. */
5500 else
5501 {
5502 /* Reload loop counter from memory, the store back to memory
5503 happens in the branch's delay slot. */
5504 output_asm_insn ("ldw %0,%4", operands);
5505 if (get_attr_length (insn) == 12)
5506 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5507 else
5508 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5509 }
5510 }
5511
5512 /* Return the output template for emitting a dbra type insn.
5513
5514 Note it may perform some output operations on its own before
5515 returning the final output string. */
5516 const char *
5517 output_movb (operands, insn, which_alternative, reverse_comparison)
5518 rtx *operands;
5519 rtx insn;
5520 int which_alternative;
5521 int reverse_comparison;
5522 {
5523
5524 /* A conditional branch to the following instruction (eg the delay slot) is
5525 asking for a disaster. Be prepared! */
5526
5527 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5528 {
5529 if (which_alternative == 0)
5530 return "copy %1,%0";
5531 else if (which_alternative == 1)
5532 {
5533 output_asm_insn ("stw %1,-16(%%r30)",operands);
5534 return "{fldws|fldw} -16(%%r30),%0";
5535 }
5536 else if (which_alternative == 2)
5537 return "stw %1,%0";
5538 else
5539 return "mtsar %r1";
5540 }
5541
5542 /* Support the second variant. */
5543 if (reverse_comparison)
5544 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
5545
5546 if (which_alternative == 0)
5547 {
5548 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5549 int length = get_attr_length (insn);
5550
5551 /* If this is a long branch with its delay slot unfilled, set `nullify'
5552 as it can nullify the delay slot and save a nop. */
5553 if (length == 8 && dbr_sequence_length () == 0)
5554 nullify = 1;
5555
5556 /* If this is a short forward conditional branch which did not get
5557 its delay slot filled, the delay slot can still be nullified. */
5558 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5559 nullify = forward_branch_p (insn);
5560
5561 /* Handle short versions first. */
5562 if (length == 4 && nullify)
5563 return "movb,%C2,n %1,%0,%3";
5564 else if (length == 4 && ! nullify)
5565 return "movb,%C2 %1,%0,%3";
5566 else if (length == 8)
5567 {
5568 /* Handle weird backwards branch with a filled delay slot
5569 which is nullified. */
5570 if (dbr_sequence_length () != 0
5571 && ! forward_branch_p (insn)
5572 && nullify)
5573 return "movb,%N2,n %1,%0,.+12\n\tb %3";
5574
5575 /* Handle short backwards branch with an unfilled delay slot.
5576 Using a movb;nop rather than or;bl saves 1 cycle for both
5577 taken and untaken branches. */
5578 else if (dbr_sequence_length () == 0
5579 && ! forward_branch_p (insn)
5580 && INSN_ADDRESSES_SET_P ()
5581 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5582 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5583 return "movb,%C2 %1,%0,%3%#";
5584 /* Handle normal cases. */
5585 if (nullify)
5586 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
5587 else
5588 return "or,%N2 %1,%%r0,%0\n\tb %3";
5589 }
5590 else
5591 abort();
5592 }
5593 /* Deal with gross reload from FP register case. */
5594 else if (which_alternative == 1)
5595 {
5596 /* Move loop counter from FP register to MEM then into a GR,
5597 increment the GR, store the GR into MEM, and finally reload
5598 the FP register from MEM from within the branch's delay slot. */
5599 output_asm_insn ("stw %1,-16(%%r30)",operands);
5600 if (get_attr_length (insn) == 12)
5601 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
5602 else
5603 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5604 }
5605 /* Deal with gross reload from memory case. */
5606 else if (which_alternative == 2)
5607 {
5608 /* Reload loop counter from memory, the store back to memory
5609 happens in the branch's delay slot. */
5610 if (get_attr_length (insn) == 8)
5611 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
5612 else
5613 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
5614 }
5615 /* Handle SAR as a destination. */
5616 else
5617 {
5618 if (get_attr_length (insn) == 8)
5619 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
5620 else
5621 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
5622 }
5623 }
5624
5625
5626 /* INSN is a millicode call. It may have an unconditional jump in its delay
5627 slot.
5628
5629 CALL_DEST is the routine we are calling. */
5630
5631 const char *
5632 output_millicode_call (insn, call_dest)
5633 rtx insn;
5634 rtx call_dest;
5635 {
5636 int distance;
5637 rtx xoperands[4];
5638 rtx seq_insn;
5639
5640 xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
5641
5642 /* Handle common case -- empty delay slot or no jump in the delay slot,
5643 and we're sure that the branch will reach the beginning of the $CODE$
5644 subspace. */
5645 if ((dbr_sequence_length () == 0
5646 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
5647 || (dbr_sequence_length () != 0
5648 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5649 && get_attr_length (insn) == 4))
5650 {
5651 xoperands[0] = call_dest;
5652 output_asm_insn ("{bl|b,l} %0,%3%#", xoperands);
5653 return "";
5654 }
5655
5656 /* This call may not reach the beginning of the $CODE$ subspace. */
5657 if (get_attr_length (insn) > 4)
5658 {
5659 int delay_insn_deleted = 0;
5660
5661 /* We need to emit an inline long-call branch. */
5662 if (dbr_sequence_length () != 0
5663 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5664 {
5665 /* A non-jump insn in the delay slot. By definition we can
5666 emit this insn before the call. */
5667 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5668
5669 /* Now delete the delay insn. */
5670 PUT_CODE (NEXT_INSN (insn), NOTE);
5671 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5672 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5673 delay_insn_deleted = 1;
5674 }
5675
5676 /* PIC long millicode call sequence. */
5677 if (flag_pic)
5678 {
5679 xoperands[0] = call_dest;
5680 xoperands[1] = gen_label_rtx ();
5681 /* Get our address + 8 into %r1. */
5682 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5683
5684 /* Add %r1 to the offset of our target from the next insn. */
5685 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
5686 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5687 CODE_LABEL_NUMBER (xoperands[1]));
5688 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
5689
5690 /* Get the return address into %r31. */
5691 output_asm_insn ("blr 0,%3", xoperands);
5692
5693 /* Branch to our target which is in %r1. */
5694 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5695
5696 /* Empty delay slot. Note this insn gets fetched twice and
5697 executed once. To be safe we use a nop. */
5698 output_asm_insn ("nop", xoperands);
5699 }
5700 /* Pure portable runtime doesn't allow be/ble; we also don't have
5701 PIC support in the assembler/linker, so this sequence is needed. */
5702 else if (TARGET_PORTABLE_RUNTIME)
5703 {
5704 xoperands[0] = call_dest;
5705 /* Get the address of our target into %r29. */
5706 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
5707 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
5708
5709 /* Get our return address into %r31. */
5710 output_asm_insn ("blr %%r0,%3", xoperands);
5711
5712 /* Jump to our target address in %r29. */
5713 output_asm_insn ("bv,n %%r0(%%r29)", xoperands);
5714
5715 /* Empty delay slot. Note this insn gets fetched twice and
5716 executed once. To be safe we use a nop. */
5717 output_asm_insn ("nop", xoperands);
5718 }
5719 /* If we're allowed to use be/ble instructions, then this is the
5720 best sequence to use for a long millicode call. */
5721 else
5722 {
5723 xoperands[0] = call_dest;
5724 output_asm_insn ("ldil L%%%0,%3", xoperands);
5725 output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
5726 output_asm_insn ("nop", xoperands);
5727 }
5728
5729 /* If we had a jump in the call's delay slot, output it now. */
5730 if (dbr_sequence_length () != 0
5731 && !delay_insn_deleted)
5732 {
5733 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5734 output_asm_insn ("b,n %0", xoperands);
5735
5736 /* Now delete the delay insn. */
5737 PUT_CODE (NEXT_INSN (insn), NOTE);
5738 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5739 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5740 }
5741 return "";
5742 }
5743
5744 /* This call has an unconditional jump in its delay slot and the
5745 call is known to reach its target or the beginning of the current
5746 subspace. */
5747
5748 /* Use the containing sequence insn's address. */
5749 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5750
5751 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
5752 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
5753
5754 /* If the branch was too far away, emit a normal call followed
5755 by a nop, followed by the unconditional branch.
5756
5757 If the branch is close, then adjust %r2 from within the
5758 call's delay slot. */
5759
5760 xoperands[0] = call_dest;
5761 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5762 if (! VAL_14_BITS_P (distance))
5763 output_asm_insn ("{bl|b,l} %0,%3\n\tnop\n\tb,n %1", xoperands);
5764 else
5765 {
5766 xoperands[2] = gen_label_rtx ();
5767 output_asm_insn ("\n\t{bl|b,l} %0,%3\n\tldo %1-%2(%3),%3",
5768 xoperands);
5769 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5770 CODE_LABEL_NUMBER (xoperands[2]));
5771 }
5772
5773 /* Delete the jump. */
5774 PUT_CODE (NEXT_INSN (insn), NOTE);
5775 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5776 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5777 return "";
5778 }
5779
5780 extern struct obstack permanent_obstack;
5781
5782 /* INSN is either a function call. It may have an unconditional jump
5783 in its delay slot.
5784
5785 CALL_DEST is the routine we are calling. */
5786
5787 const char *
5788 output_call (insn, call_dest, sibcall)
5789 rtx insn;
5790 rtx call_dest;
5791 int sibcall;
5792 {
5793 int distance;
5794 rtx xoperands[4];
5795 rtx seq_insn;
5796
5797 /* Handle common case -- empty delay slot or no jump in the delay slot,
5798 and we're sure that the branch will reach the beginning of the $CODE$
5799 subspace. */
5800 if ((dbr_sequence_length () == 0
5801 && get_attr_length (insn) == 8)
5802 || (dbr_sequence_length () != 0
5803 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5804 && get_attr_length (insn) == 4))
5805 {
5806 xoperands[0] = call_dest;
5807 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
5808 output_asm_insn ("{bl|b,l} %0,%1%#", xoperands);
5809 return "";
5810 }
5811
5812 /* This call may not reach the beginning of the $CODE$ subspace. */
5813 if (get_attr_length (insn) > 8)
5814 {
5815 int delay_insn_deleted = 0;
5816 rtx xoperands[2];
5817 rtx link;
5818
5819 /* We need to emit an inline long-call branch. Furthermore,
5820 because we're changing a named function call into an indirect
5821 function call well after the parameters have been set up, we
5822 need to make sure any FP args appear in both the integer
5823 and FP registers. Also, we need move any delay slot insn
5824 out of the delay slot. And finally, we can't rely on the linker
5825 being able to fix the call to $$dyncall! -- Yuk!. */
5826 if (dbr_sequence_length () != 0
5827 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5828 {
5829 /* A non-jump insn in the delay slot. By definition we can
5830 emit this insn before the call (and in fact before argument
5831 relocating. */
5832 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5833
5834 /* Now delete the delay insn. */
5835 PUT_CODE (NEXT_INSN (insn), NOTE);
5836 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5837 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5838 delay_insn_deleted = 1;
5839 }
5840
5841 /* Now copy any FP arguments into integer registers. */
5842 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5843 {
5844 int arg_mode, regno;
5845 rtx use = XEXP (link, 0);
5846 if (! (GET_CODE (use) == USE
5847 && GET_CODE (XEXP (use, 0)) == REG
5848 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5849 continue;
5850
5851 arg_mode = GET_MODE (XEXP (use, 0));
5852 regno = REGNO (XEXP (use, 0));
5853 /* Is it a floating point register? */
5854 if (regno >= 32 && regno <= 39)
5855 {
5856 /* Copy from the FP register into an integer register
5857 (via memory). */
5858 if (arg_mode == SFmode)
5859 {
5860 xoperands[0] = XEXP (use, 0);
5861 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
5862 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)",
5863 xoperands);
5864 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5865 }
5866 else
5867 {
5868 xoperands[0] = XEXP (use, 0);
5869 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
5870 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)",
5871 xoperands);
5872 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5873 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5874 }
5875 }
5876 }
5877
5878 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5879 we don't have any direct calls in that case. */
5880 {
5881 int i;
5882 const char *name = XSTR (call_dest, 0);
5883
5884 /* See if we have already put this function on the list
5885 of deferred plabels. This list is generally small,
5886 so a liner search is not too ugly. If it proves too
5887 slow replace it with something faster. */
5888 for (i = 0; i < n_deferred_plabels; i++)
5889 if (strcmp (name, deferred_plabels[i].name) == 0)
5890 break;
5891
5892 /* If the deferred plabel list is empty, or this entry was
5893 not found on the list, create a new entry on the list. */
5894 if (deferred_plabels == NULL || i == n_deferred_plabels)
5895 {
5896 const char *real_name;
5897
5898 if (deferred_plabels == 0)
5899 deferred_plabels = (struct deferred_plabel *)
5900 xmalloc (1 * sizeof (struct deferred_plabel));
5901 else
5902 deferred_plabels = (struct deferred_plabel *)
5903 xrealloc (deferred_plabels,
5904 ((n_deferred_plabels + 1)
5905 * sizeof (struct deferred_plabel)));
5906
5907 i = n_deferred_plabels++;
5908 deferred_plabels[i].internal_label = gen_label_rtx ();
5909 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
5910 strlen (name) + 1);
5911 strcpy (deferred_plabels[i].name, name);
5912
5913 /* Gross. We have just implicitly taken the address of this
5914 function, mark it as such. */
5915 STRIP_NAME_ENCODING (real_name, name);
5916 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5917 }
5918
5919 /* We have to load the address of the function using a procedure
5920 label (plabel). Inline plabels can lose for PIC and other
5921 cases, so avoid them by creating a 32bit plabel in the data
5922 segment. */
5923 if (flag_pic)
5924 {
5925 xoperands[0] = deferred_plabels[i].internal_label;
5926 xoperands[1] = gen_label_rtx ();
5927
5928 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5929 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5930 output_asm_insn ("ldw 0(%%r22),%%r22", xoperands);
5931
5932 /* Get our address + 8 into %r1. */
5933 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5934
5935 /* Add %r1 to the offset of dyncall from the next insn. */
5936 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5937 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5938 CODE_LABEL_NUMBER (xoperands[1]));
5939 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5940
5941 /* Get the return address into %r31. */
5942 output_asm_insn ("blr %%r0,%%r31", xoperands);
5943
5944 /* Branch to our target which is in %r1. */
5945 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5946
5947 if (sibcall)
5948 {
5949 /* This call never returns, so we do not need to fix the
5950 return pointer. */
5951 output_asm_insn ("nop", xoperands);
5952 }
5953 else
5954 {
5955 /* Copy the return address into %r2 also. */
5956 output_asm_insn ("copy %%r31,%%r2", xoperands);
5957 }
5958 }
5959 else
5960 {
5961 xoperands[0] = deferred_plabels[i].internal_label;
5962
5963 /* Get the address of our target into %r22. */
5964 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
5965 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
5966
5967 /* Get the high part of the address of $dyncall into %r2, then
5968 add in the low part in the branch instruction. */
5969 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5970 output_asm_insn ("{ble|be,l} R%%$$dyncall(%%sr4,%%r2)",
5971 xoperands);
5972
5973 if (sibcall)
5974 {
5975 /* This call never returns, so we do not need to fix the
5976 return pointer. */
5977 output_asm_insn ("nop", xoperands);
5978 }
5979 else
5980 {
5981 /* Copy the return address into %r2 also. */
5982 output_asm_insn ("copy %%r31,%%r2", xoperands);
5983 }
5984 }
5985 }
5986
5987 /* If we had a jump in the call's delay slot, output it now. */
5988 if (dbr_sequence_length () != 0
5989 && !delay_insn_deleted)
5990 {
5991 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5992 output_asm_insn ("b,n %0", xoperands);
5993
5994 /* Now delete the delay insn. */
5995 PUT_CODE (NEXT_INSN (insn), NOTE);
5996 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5997 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5998 }
5999 return "";
6000 }
6001
6002 /* This call has an unconditional jump in its delay slot and the
6003 call is known to reach its target or the beginning of the current
6004 subspace. */
6005
6006 /* Use the containing sequence insn's address. */
6007 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6008
6009 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6010 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6011
6012 /* If the branch was too far away, emit a normal call followed
6013 by a nop, followed by the unconditional branch.
6014
6015 If the branch is close, then adjust %r2 from within the
6016 call's delay slot. */
6017
6018 xoperands[0] = call_dest;
6019 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6020 if (! VAL_14_BITS_P (distance))
6021 output_asm_insn ("{bl|b,l} %0,%%r2\n\tnop\n\tb,n %1", xoperands);
6022 else
6023 {
6024 xoperands[3] = gen_label_rtx ();
6025 output_asm_insn ("\n\t{bl|b,l} %0,%%r2\n\tldo %1-%3(%%r2),%%r2",
6026 xoperands);
6027 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6028 CODE_LABEL_NUMBER (xoperands[3]));
6029 }
6030
6031 /* Delete the jump. */
6032 PUT_CODE (NEXT_INSN (insn), NOTE);
6033 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6034 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6035 return "";
6036 }
6037
6038 /* In HPUX 8.0's shared library scheme, special relocations are needed
6039 for function labels if they might be passed to a function
6040 in a shared library (because shared libraries don't live in code
6041 space), and special magic is needed to construct their address. */
6042
6043 void
6044 hppa_encode_label (sym)
6045 rtx sym;
6046 {
6047 const char *str = XSTR (sym, 0);
6048 int len = strlen (str) + 1;
6049 char *newstr, *p;
6050
6051 p = newstr = alloca (len + 1);
6052 if (str[0] == '*')
6053 {
6054 str++;
6055 len--;
6056 }
6057 *p++ = '@';
6058 strcpy (p, str);
6059
6060 XSTR (sym,0) = ggc_alloc_string (newstr, len);
6061 }
6062
6063 int
6064 function_label_operand (op, mode)
6065 rtx op;
6066 enum machine_mode mode ATTRIBUTE_UNUSED;
6067 {
6068 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
6069 }
6070
6071 /* Returns 1 if OP is a function label involved in a simple addition
6072 with a constant. Used to keep certain patterns from matching
6073 during instruction combination. */
6074 int
6075 is_function_label_plus_const (op)
6076 rtx op;
6077 {
6078 /* Strip off any CONST. */
6079 if (GET_CODE (op) == CONST)
6080 op = XEXP (op, 0);
6081
6082 return (GET_CODE (op) == PLUS
6083 && function_label_operand (XEXP (op, 0), Pmode)
6084 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6085 }
6086
6087 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6088 use in fmpyadd instructions. */
6089 int
6090 fmpyaddoperands (operands)
6091 rtx *operands;
6092 {
6093 enum machine_mode mode = GET_MODE (operands[0]);
6094
6095 /* Must be a floating point mode. */
6096 if (mode != SFmode && mode != DFmode)
6097 return 0;
6098
6099 /* All modes must be the same. */
6100 if (! (mode == GET_MODE (operands[1])
6101 && mode == GET_MODE (operands[2])
6102 && mode == GET_MODE (operands[3])
6103 && mode == GET_MODE (operands[4])
6104 && mode == GET_MODE (operands[5])))
6105 return 0;
6106
6107 /* All operands must be registers. */
6108 if (! (GET_CODE (operands[1]) == REG
6109 && GET_CODE (operands[2]) == REG
6110 && GET_CODE (operands[3]) == REG
6111 && GET_CODE (operands[4]) == REG
6112 && GET_CODE (operands[5]) == REG))
6113 return 0;
6114
6115 /* Only 2 real operands to the addition. One of the input operands must
6116 be the same as the output operand. */
6117 if (! rtx_equal_p (operands[3], operands[4])
6118 && ! rtx_equal_p (operands[3], operands[5]))
6119 return 0;
6120
6121 /* Inout operand of add can not conflict with any operands from multiply. */
6122 if (rtx_equal_p (operands[3], operands[0])
6123 || rtx_equal_p (operands[3], operands[1])
6124 || rtx_equal_p (operands[3], operands[2]))
6125 return 0;
6126
6127 /* multiply can not feed into addition operands. */
6128 if (rtx_equal_p (operands[4], operands[0])
6129 || rtx_equal_p (operands[5], operands[0]))
6130 return 0;
6131
6132 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6133 if (mode == SFmode
6134 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6135 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6136 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6137 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6138 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6139 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6140 return 0;
6141
6142 /* Passed. Operands are suitable for fmpyadd. */
6143 return 1;
6144 }
6145
6146 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6147 use in fmpysub instructions. */
6148 int
6149 fmpysuboperands (operands)
6150 rtx *operands;
6151 {
6152 enum machine_mode mode = GET_MODE (operands[0]);
6153
6154 /* Must be a floating point mode. */
6155 if (mode != SFmode && mode != DFmode)
6156 return 0;
6157
6158 /* All modes must be the same. */
6159 if (! (mode == GET_MODE (operands[1])
6160 && mode == GET_MODE (operands[2])
6161 && mode == GET_MODE (operands[3])
6162 && mode == GET_MODE (operands[4])
6163 && mode == GET_MODE (operands[5])))
6164 return 0;
6165
6166 /* All operands must be registers. */
6167 if (! (GET_CODE (operands[1]) == REG
6168 && GET_CODE (operands[2]) == REG
6169 && GET_CODE (operands[3]) == REG
6170 && GET_CODE (operands[4]) == REG
6171 && GET_CODE (operands[5]) == REG))
6172 return 0;
6173
6174 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6175 operation, so operands[4] must be the same as operand[3]. */
6176 if (! rtx_equal_p (operands[3], operands[4]))
6177 return 0;
6178
6179 /* multiply can not feed into subtraction. */
6180 if (rtx_equal_p (operands[5], operands[0]))
6181 return 0;
6182
6183 /* Inout operand of sub can not conflict with any operands from multiply. */
6184 if (rtx_equal_p (operands[3], operands[0])
6185 || rtx_equal_p (operands[3], operands[1])
6186 || rtx_equal_p (operands[3], operands[2]))
6187 return 0;
6188
6189 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6190 if (mode == SFmode
6191 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6192 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6193 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6194 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6195 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6196 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6197 return 0;
6198
6199 /* Passed. Operands are suitable for fmpysub. */
6200 return 1;
6201 }
6202
6203 int
6204 plus_xor_ior_operator (op, mode)
6205 rtx op;
6206 enum machine_mode mode ATTRIBUTE_UNUSED;
6207 {
6208 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6209 || GET_CODE (op) == IOR);
6210 }
6211
6212 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6213 constants for shadd instructions. */
6214 static int
6215 shadd_constant_p (val)
6216 int val;
6217 {
6218 if (val == 2 || val == 4 || val == 8)
6219 return 1;
6220 else
6221 return 0;
6222 }
6223
6224 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6225 the valid constant for shadd instructions. */
6226 int
6227 shadd_operand (op, mode)
6228 rtx op;
6229 enum machine_mode mode ATTRIBUTE_UNUSED;
6230 {
6231 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6232 }
6233
6234 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6235
6236 int
6237 basereg_operand (op, mode)
6238 rtx op;
6239 enum machine_mode mode;
6240 {
6241 /* cse will create some unscaled indexed addresses, however; it
6242 generally isn't a win on the PA, so avoid creating unscaled
6243 indexed addresses until after cse is finished. */
6244 if (!cse_not_expected)
6245 return 0;
6246
6247 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
6248 we don't have to worry about the braindamaged implicit space
6249 register selection from the basereg. */
6250 if (TARGET_NO_SPACE_REGS)
6251 return (GET_CODE (op) == REG);
6252
6253 /* While it's always safe to index off the frame pointer, it's not
6254 always profitable, particularly when the frame pointer is being
6255 eliminated. */
6256 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
6257 return 1;
6258
6259 return (GET_CODE (op) == REG
6260 && REG_POINTER (op)
6261 && register_operand (op, mode));
6262 }
6263
6264 /* Return 1 if this operand is anything other than a hard register. */
6265
6266 int
6267 non_hard_reg_operand (op, mode)
6268 rtx op;
6269 enum machine_mode mode ATTRIBUTE_UNUSED;
6270 {
6271 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
6272 }
6273
6274 /* Return 1 if INSN branches forward. Should be using insn_addresses
6275 to avoid walking through all the insns... */
6276 static int
6277 forward_branch_p (insn)
6278 rtx insn;
6279 {
6280 rtx label = JUMP_LABEL (insn);
6281
6282 while (insn)
6283 {
6284 if (insn == label)
6285 break;
6286 else
6287 insn = NEXT_INSN (insn);
6288 }
6289
6290 return (insn == label);
6291 }
6292
6293 /* Return 1 if OP is an equality comparison, else return 0. */
6294 int
6295 eq_neq_comparison_operator (op, mode)
6296 rtx op;
6297 enum machine_mode mode ATTRIBUTE_UNUSED;
6298 {
6299 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
6300 }
6301
6302 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
6303 int
6304 movb_comparison_operator (op, mode)
6305 rtx op;
6306 enum machine_mode mode ATTRIBUTE_UNUSED;
6307 {
6308 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
6309 || GET_CODE (op) == LT || GET_CODE (op) == GE);
6310 }
6311
6312 /* Return 1 if INSN is in the delay slot of a call instruction. */
6313 int
6314 jump_in_call_delay (insn)
6315 rtx insn;
6316 {
6317
6318 if (GET_CODE (insn) != JUMP_INSN)
6319 return 0;
6320
6321 if (PREV_INSN (insn)
6322 && PREV_INSN (PREV_INSN (insn))
6323 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
6324 {
6325 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
6326
6327 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
6328 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
6329
6330 }
6331 else
6332 return 0;
6333 }
6334
6335 /* Output an unconditional move and branch insn. */
6336
6337 const char *
6338 output_parallel_movb (operands, length)
6339 rtx *operands;
6340 int length;
6341 {
6342 /* These are the cases in which we win. */
6343 if (length == 4)
6344 return "mov%I1b,tr %1,%0,%2";
6345
6346 /* None of these cases wins, but they don't lose either. */
6347 if (dbr_sequence_length () == 0)
6348 {
6349 /* Nothing in the delay slot, fake it by putting the combined
6350 insn (the copy or add) in the delay slot of a bl. */
6351 if (GET_CODE (operands[1]) == CONST_INT)
6352 return "b %2\n\tldi %1,%0";
6353 else
6354 return "b %2\n\tcopy %1,%0";
6355 }
6356 else
6357 {
6358 /* Something in the delay slot, but we've got a long branch. */
6359 if (GET_CODE (operands[1]) == CONST_INT)
6360 return "ldi %1,%0\n\tb %2";
6361 else
6362 return "copy %1,%0\n\tb %2";
6363 }
6364 }
6365
6366 /* Output an unconditional add and branch insn. */
6367
6368 const char *
6369 output_parallel_addb (operands, length)
6370 rtx *operands;
6371 int length;
6372 {
6373 /* To make life easy we want operand0 to be the shared input/output
6374 operand and operand1 to be the readonly operand. */
6375 if (operands[0] == operands[1])
6376 operands[1] = operands[2];
6377
6378 /* These are the cases in which we win. */
6379 if (length == 4)
6380 return "add%I1b,tr %1,%0,%3";
6381
6382 /* None of these cases win, but they don't lose either. */
6383 if (dbr_sequence_length () == 0)
6384 {
6385 /* Nothing in the delay slot, fake it by putting the combined
6386 insn (the copy or add) in the delay slot of a bl. */
6387 return "b %3\n\tadd%I1 %1,%0,%0";
6388 }
6389 else
6390 {
6391 /* Something in the delay slot, but we've got a long branch. */
6392 return "add%I1 %1,%0,%0\n\tb %3";
6393 }
6394 }
6395
6396 /* Return nonzero if INSN (a jump insn) immediately follows a call to
6397 a named function. This is used to discourage creating parallel movb/addb
6398 insns since a jump which immediately follows a call can execute in the
6399 delay slot of the call.
6400
6401 It is also used to avoid filling the delay slot of a jump which
6402 immediately follows a call since the jump can usually be eliminated
6403 completely by modifying RP in the delay slot of the call. */
6404
6405 int
6406 following_call (insn)
6407 rtx insn;
6408 {
6409 /* We do not parallel movb,addb or place jumps into call delay slots when
6410 optimizing for the PA8000. */
6411 if (pa_cpu != PROCESSOR_8000)
6412 return 0;
6413
6414 /* Find the previous real insn, skipping NOTEs. */
6415 insn = PREV_INSN (insn);
6416 while (insn && GET_CODE (insn) == NOTE)
6417 insn = PREV_INSN (insn);
6418
6419 /* Check for CALL_INSNs and millicode calls. */
6420 if (insn
6421 && ((GET_CODE (insn) == CALL_INSN
6422 && get_attr_type (insn) != TYPE_DYNCALL)
6423 || (GET_CODE (insn) == INSN
6424 && GET_CODE (PATTERN (insn)) != SEQUENCE
6425 && GET_CODE (PATTERN (insn)) != USE
6426 && GET_CODE (PATTERN (insn)) != CLOBBER
6427 && get_attr_type (insn) == TYPE_MILLI)))
6428 return 1;
6429
6430 return 0;
6431 }
6432
6433 /* We use this hook to perform a PA specific optimization which is difficult
6434 to do in earlier passes.
6435
6436 We want the delay slots of branches within jump tables to be filled.
6437 None of the compiler passes at the moment even has the notion that a
6438 PA jump table doesn't contain addresses, but instead contains actual
6439 instructions!
6440
6441 Because we actually jump into the table, the addresses of each entry
6442 must stay constant in relation to the beginning of the table (which
6443 itself must stay constant relative to the instruction to jump into
6444 it). I don't believe we can guarantee earlier passes of the compiler
6445 will adhere to those rules.
6446
6447 So, late in the compilation process we find all the jump tables, and
6448 expand them into real code -- eg each entry in the jump table vector
6449 will get an appropriate label followed by a jump to the final target.
6450
6451 Reorg and the final jump pass can then optimize these branches and
6452 fill their delay slots. We end up with smaller, more efficient code.
6453
6454 The jump instructions within the table are special; we must be able
6455 to identify them during assembly output (if the jumps don't get filled
6456 we need to emit a nop rather than nullifying the delay slot)). We
6457 identify jumps in switch tables by marking the SET with DImode.
6458
6459 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6460 insns. This serves two purposes, first it prevents jump.c from
6461 noticing that the last N entries in the table jump to the instruction
6462 immediately after the table and deleting the jumps. Second, those
6463 insns mark where we should emit .begin_brtab and .end_brtab directives
6464 when using GAS (allows for better link time optimizations). */
6465
6466 void
6467 pa_reorg (insns)
6468 rtx insns;
6469 {
6470 rtx insn;
6471
6472 remove_useless_addtr_insns (insns, 1);
6473
6474 if (pa_cpu < PROCESSOR_8000)
6475 pa_combine_instructions (get_insns ());
6476
6477
6478 /* This is fairly cheap, so always run it if optimizing. */
6479 if (optimize > 0 && !TARGET_BIG_SWITCH)
6480 {
6481 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6482 insns = get_insns ();
6483 for (insn = insns; insn; insn = NEXT_INSN (insn))
6484 {
6485 rtx pattern, tmp, location;
6486 unsigned int length, i;
6487
6488 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6489 if (GET_CODE (insn) != JUMP_INSN
6490 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6491 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6492 continue;
6493
6494 /* Emit marker for the beginning of the branch table. */
6495 emit_insn_before (gen_begin_brtab (), insn);
6496
6497 pattern = PATTERN (insn);
6498 location = PREV_INSN (insn);
6499 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6500
6501 for (i = 0; i < length; i++)
6502 {
6503 /* Emit a label before each jump to keep jump.c from
6504 removing this code. */
6505 tmp = gen_label_rtx ();
6506 LABEL_NUSES (tmp) = 1;
6507 emit_label_after (tmp, location);
6508 location = NEXT_INSN (location);
6509
6510 if (GET_CODE (pattern) == ADDR_VEC)
6511 {
6512 /* Emit the jump itself. */
6513 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6514 tmp = emit_jump_insn_after (tmp, location);
6515 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6516 /* It is easy to rely on the branch table markers
6517 during assembly output to trigger the correct code
6518 for a switch table jump with an unfilled delay slot,
6519
6520 However, that requires state and assumes that we look
6521 at insns in order.
6522
6523 We can't make such assumptions when computing the length
6524 of instructions. Ugh. We could walk the insn chain to
6525 determine if this instruction is in a branch table, but
6526 that can get rather expensive, particularly during the
6527 branch shortening phase of the compiler.
6528
6529 So instead we mark this jump as being special. This is
6530 far from ideal and knows that no code after this will
6531 muck around with the mode of the JUMP_INSN itself. */
6532 PUT_MODE (tmp, SImode);
6533 LABEL_NUSES (JUMP_LABEL (tmp))++;
6534 location = NEXT_INSN (location);
6535 }
6536 else
6537 {
6538 /* Emit the jump itself. */
6539 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6540 tmp = emit_jump_insn_after (tmp, location);
6541 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6542 /* It is easy to rely on the branch table markers
6543 during assembly output to trigger the correct code
6544 for a switch table jump with an unfilled delay slot,
6545
6546 However, that requires state and assumes that we look
6547 at insns in order.
6548
6549 We can't make such assumptions when computing the length
6550 of instructions. Ugh. We could walk the insn chain to
6551 determine if this instruction is in a branch table, but
6552 that can get rather expensive, particularly during the
6553 branch shortening phase of the compiler.
6554
6555 So instead we mark this jump as being special. This is
6556 far from ideal and knows that no code after this will
6557 muck around with the mode of the JUMP_INSN itself. */
6558 PUT_MODE (tmp, SImode);
6559 LABEL_NUSES (JUMP_LABEL (tmp))++;
6560 location = NEXT_INSN (location);
6561 }
6562
6563 /* Emit a BARRIER after the jump. */
6564 emit_barrier_after (location);
6565 location = NEXT_INSN (location);
6566 }
6567
6568 /* Emit marker for the end of the branch table. */
6569 emit_insn_before (gen_end_brtab (), location);
6570 location = NEXT_INSN (location);
6571 emit_barrier_after (location);
6572
6573 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6574 delete_insn (insn);
6575 }
6576 }
6577 else
6578 {
6579 /* Sill need an end_brtab insn. */
6580 insns = get_insns ();
6581 for (insn = insns; insn; insn = NEXT_INSN (insn))
6582 {
6583 /* Find an ADDR_VEC insn. */
6584 if (GET_CODE (insn) != JUMP_INSN
6585 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6586 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6587 continue;
6588
6589 /* Now generate markers for the beginning and end of the
6590 branch table. */
6591 emit_insn_before (gen_begin_brtab (), insn);
6592 emit_insn_after (gen_end_brtab (), insn);
6593 }
6594 }
6595 }
6596
6597 /* The PA has a number of odd instructions which can perform multiple
6598 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6599 it may be profitable to combine two instructions into one instruction
6600 with two outputs. It's not profitable PA2.0 machines because the
6601 two outputs would take two slots in the reorder buffers.
6602
6603 This routine finds instructions which can be combined and combines
6604 them. We only support some of the potential combinations, and we
6605 only try common ways to find suitable instructions.
6606
6607 * addb can add two registers or a register and a small integer
6608 and jump to a nearby (+-8k) location. Normally the jump to the
6609 nearby location is conditional on the result of the add, but by
6610 using the "true" condition we can make the jump unconditional.
6611 Thus addb can perform two independent operations in one insn.
6612
6613 * movb is similar to addb in that it can perform a reg->reg
6614 or small immediate->reg copy and jump to a nearby (+-8k location).
6615
6616 * fmpyadd and fmpysub can perform a FP multiply and either an
6617 FP add or FP sub if the operands of the multiply and add/sub are
6618 independent (there are other minor restrictions). Note both
6619 the fmpy and fadd/fsub can in theory move to better spots according
6620 to data dependencies, but for now we require the fmpy stay at a
6621 fixed location.
6622
6623 * Many of the memory operations can perform pre & post updates
6624 of index registers. GCC's pre/post increment/decrement addressing
6625 is far too simple to take advantage of all the possibilities. This
6626 pass may not be suitable since those insns may not be independent.
6627
6628 * comclr can compare two ints or an int and a register, nullify
6629 the following instruction and zero some other register. This
6630 is more difficult to use as it's harder to find an insn which
6631 will generate a comclr than finding something like an unconditional
6632 branch. (conditional moves & long branches create comclr insns).
6633
6634 * Most arithmetic operations can conditionally skip the next
6635 instruction. They can be viewed as "perform this operation
6636 and conditionally jump to this nearby location" (where nearby
6637 is an insns away). These are difficult to use due to the
6638 branch length restrictions. */
6639
6640 static void
6641 pa_combine_instructions (insns)
6642 rtx insns ATTRIBUTE_UNUSED;
6643 {
6644 rtx anchor, new;
6645
6646 /* This can get expensive since the basic algorithm is on the
6647 order of O(n^2) (or worse). Only do it for -O2 or higher
6648 levels of optimization. */
6649 if (optimize < 2)
6650 return;
6651
6652 /* Walk down the list of insns looking for "anchor" insns which
6653 may be combined with "floating" insns. As the name implies,
6654 "anchor" instructions don't move, while "floating" insns may
6655 move around. */
6656 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
6657 new = make_insn_raw (new);
6658
6659 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
6660 {
6661 enum attr_pa_combine_type anchor_attr;
6662 enum attr_pa_combine_type floater_attr;
6663
6664 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
6665 Also ignore any special USE insns. */
6666 if ((GET_CODE (anchor) != INSN
6667 && GET_CODE (anchor) != JUMP_INSN
6668 && GET_CODE (anchor) != CALL_INSN)
6669 || GET_CODE (PATTERN (anchor)) == USE
6670 || GET_CODE (PATTERN (anchor)) == CLOBBER
6671 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
6672 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
6673 continue;
6674
6675 anchor_attr = get_attr_pa_combine_type (anchor);
6676 /* See if anchor is an insn suitable for combination. */
6677 if (anchor_attr == PA_COMBINE_TYPE_FMPY
6678 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
6679 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6680 && ! forward_branch_p (anchor)))
6681 {
6682 rtx floater;
6683
6684 for (floater = PREV_INSN (anchor);
6685 floater;
6686 floater = PREV_INSN (floater))
6687 {
6688 if (GET_CODE (floater) == NOTE
6689 || (GET_CODE (floater) == INSN
6690 && (GET_CODE (PATTERN (floater)) == USE
6691 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6692 continue;
6693
6694 /* Anything except a regular INSN will stop our search. */
6695 if (GET_CODE (floater) != INSN
6696 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6697 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6698 {
6699 floater = NULL_RTX;
6700 break;
6701 }
6702
6703 /* See if FLOATER is suitable for combination with the
6704 anchor. */
6705 floater_attr = get_attr_pa_combine_type (floater);
6706 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6707 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6708 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6709 && floater_attr == PA_COMBINE_TYPE_FMPY))
6710 {
6711 /* If ANCHOR and FLOATER can be combined, then we're
6712 done with this pass. */
6713 if (pa_can_combine_p (new, anchor, floater, 0,
6714 SET_DEST (PATTERN (floater)),
6715 XEXP (SET_SRC (PATTERN (floater)), 0),
6716 XEXP (SET_SRC (PATTERN (floater)), 1)))
6717 break;
6718 }
6719
6720 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6721 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
6722 {
6723 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
6724 {
6725 if (pa_can_combine_p (new, anchor, floater, 0,
6726 SET_DEST (PATTERN (floater)),
6727 XEXP (SET_SRC (PATTERN (floater)), 0),
6728 XEXP (SET_SRC (PATTERN (floater)), 1)))
6729 break;
6730 }
6731 else
6732 {
6733 if (pa_can_combine_p (new, anchor, floater, 0,
6734 SET_DEST (PATTERN (floater)),
6735 SET_SRC (PATTERN (floater)),
6736 SET_SRC (PATTERN (floater))))
6737 break;
6738 }
6739 }
6740 }
6741
6742 /* If we didn't find anything on the backwards scan try forwards. */
6743 if (!floater
6744 && (anchor_attr == PA_COMBINE_TYPE_FMPY
6745 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
6746 {
6747 for (floater = anchor; floater; floater = NEXT_INSN (floater))
6748 {
6749 if (GET_CODE (floater) == NOTE
6750 || (GET_CODE (floater) == INSN
6751 && (GET_CODE (PATTERN (floater)) == USE
6752 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6753
6754 continue;
6755
6756 /* Anything except a regular INSN will stop our search. */
6757 if (GET_CODE (floater) != INSN
6758 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6759 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6760 {
6761 floater = NULL_RTX;
6762 break;
6763 }
6764
6765 /* See if FLOATER is suitable for combination with the
6766 anchor. */
6767 floater_attr = get_attr_pa_combine_type (floater);
6768 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6769 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6770 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6771 && floater_attr == PA_COMBINE_TYPE_FMPY))
6772 {
6773 /* If ANCHOR and FLOATER can be combined, then we're
6774 done with this pass. */
6775 if (pa_can_combine_p (new, anchor, floater, 1,
6776 SET_DEST (PATTERN (floater)),
6777 XEXP (SET_SRC (PATTERN(floater)),0),
6778 XEXP(SET_SRC(PATTERN(floater)),1)))
6779 break;
6780 }
6781 }
6782 }
6783
6784 /* FLOATER will be nonzero if we found a suitable floating
6785 insn for combination with ANCHOR. */
6786 if (floater
6787 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6788 || anchor_attr == PA_COMBINE_TYPE_FMPY))
6789 {
6790 /* Emit the new instruction and delete the old anchor. */
6791 emit_insn_before (gen_rtx_PARALLEL
6792 (VOIDmode,
6793 gen_rtvec (2, PATTERN (anchor),
6794 PATTERN (floater))),
6795 anchor);
6796
6797 PUT_CODE (anchor, NOTE);
6798 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6799 NOTE_SOURCE_FILE (anchor) = 0;
6800
6801 /* Emit a special USE insn for FLOATER, then delete
6802 the floating insn. */
6803 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6804 delete_insn (floater);
6805
6806 continue;
6807 }
6808 else if (floater
6809 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6810 {
6811 rtx temp;
6812 /* Emit the new_jump instruction and delete the old anchor. */
6813 temp
6814 = emit_jump_insn_before (gen_rtx_PARALLEL
6815 (VOIDmode,
6816 gen_rtvec (2, PATTERN (anchor),
6817 PATTERN (floater))),
6818 anchor);
6819
6820 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6821 PUT_CODE (anchor, NOTE);
6822 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6823 NOTE_SOURCE_FILE (anchor) = 0;
6824
6825 /* Emit a special USE insn for FLOATER, then delete
6826 the floating insn. */
6827 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6828 delete_insn (floater);
6829 continue;
6830 }
6831 }
6832 }
6833 }
6834
6835 static int
6836 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6837 rtx new, anchor, floater;
6838 int reversed;
6839 rtx dest, src1, src2;
6840 {
6841 int insn_code_number;
6842 rtx start, end;
6843
6844 /* Create a PARALLEL with the patterns of ANCHOR and
6845 FLOATER, try to recognize it, then test constraints
6846 for the resulting pattern.
6847
6848 If the pattern doesn't match or the constraints
6849 aren't met keep searching for a suitable floater
6850 insn. */
6851 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
6852 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
6853 INSN_CODE (new) = -1;
6854 insn_code_number = recog_memoized (new);
6855 if (insn_code_number < 0
6856 || !constrain_operands (1))
6857 return 0;
6858
6859 if (reversed)
6860 {
6861 start = anchor;
6862 end = floater;
6863 }
6864 else
6865 {
6866 start = floater;
6867 end = anchor;
6868 }
6869
6870 /* There's up to three operands to consider. One
6871 output and two inputs.
6872
6873 The output must not be used between FLOATER & ANCHOR
6874 exclusive. The inputs must not be set between
6875 FLOATER and ANCHOR exclusive. */
6876
6877 if (reg_used_between_p (dest, start, end))
6878 return 0;
6879
6880 if (reg_set_between_p (src1, start, end))
6881 return 0;
6882
6883 if (reg_set_between_p (src2, start, end))
6884 return 0;
6885
6886 /* If we get here, then everything is good. */
6887 return 1;
6888 }
6889
6890 /* Return nonzero if references for INSN are delayed.
6891
6892 Millicode insns are actually function calls with some special
6893 constraints on arguments and register usage.
6894
6895 Millicode calls always expect their arguments in the integer argument
6896 registers, and always return their result in %r29 (ret1). They
6897 are expected to clobber their arguments, %r1, %r29, and %r31 and
6898 nothing else.
6899
6900 By considering this effects delayed reorg reorg can put insns
6901 which set the argument registers into the delay slot of the millicode
6902 call -- thus they act more like traditional CALL_INSNs.
6903
6904 get_attr_type will try to recognize the given insn, so make sure to
6905 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
6906 in particular. */
6907 int
6908 insn_refs_are_delayed (insn)
6909 rtx insn;
6910 {
6911 return ((GET_CODE (insn) == INSN
6912 && GET_CODE (PATTERN (insn)) != SEQUENCE
6913 && GET_CODE (PATTERN (insn)) != USE
6914 && GET_CODE (PATTERN (insn)) != CLOBBER
6915 && get_attr_type (insn) == TYPE_MILLI));
6916 }
6917
6918 /* Return the location of a parameter that is passed in a register or NULL
6919 if the parameter has any component that is passed in memory.
6920
6921 This is new code and will be pushed to into the net sources after
6922 further testing.
6923
6924 ??? We might want to restructure this so that it looks more like other
6925 ports. */
6926 rtx
6927 function_arg (cum, mode, type, named, incoming)
6928 CUMULATIVE_ARGS *cum;
6929 enum machine_mode mode;
6930 tree type;
6931 int named ATTRIBUTE_UNUSED;
6932 int incoming;
6933 {
6934 int max_arg_words = (TARGET_64BIT ? 8 : 4);
6935 int fpr_reg_base;
6936 int gpr_reg_base;
6937 rtx retval;
6938
6939 if (! TARGET_64BIT)
6940 {
6941 /* If this arg would be passed partially or totally on the stack, then
6942 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
6943 handle arguments which are split between regs and stack slots if
6944 the ABI mandates split arguments. */
6945 if (cum->words + FUNCTION_ARG_SIZE (mode, type) > max_arg_words
6946 || mode == VOIDmode)
6947 return NULL_RTX;
6948 }
6949 else
6950 {
6951 int offset = 0;
6952 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
6953 offset = 1;
6954 if (cum->words + offset >= max_arg_words
6955 || mode == VOIDmode)
6956 return NULL_RTX;
6957 }
6958
6959 /* The 32bit ABIs and the 64bit ABIs are rather different,
6960 particularly in their handling of FP registers. We might
6961 be able to cleverly share code between them, but I'm not
6962 going to bother in the hope that splitting them up results
6963 in code that is more easily understood.
6964
6965 The 64bit code probably is very wrong for structure passing. */
6966 if (TARGET_64BIT)
6967 {
6968 /* Advance the base registers to their current locations.
6969
6970 Remember, gprs grow towards smaller register numbers while
6971 fprs grow to higher register numbers. Also remember FP regs
6972 are always 4 bytes wide, while the size of an integer register
6973 varies based on the size of the target word. */
6974 gpr_reg_base = 26 - cum->words;
6975 fpr_reg_base = 32 + cum->words;
6976
6977 /* If the argument is more than a word long, then we need to align
6978 the base registers. Same caveats as above. */
6979 if (FUNCTION_ARG_SIZE (mode, type) > 1)
6980 {
6981 if (mode != BLKmode)
6982 {
6983 /* First deal with alignment of the doubleword. */
6984 gpr_reg_base -= (cum->words & 1);
6985
6986 /* This seems backwards, but it is what HP specifies. We need
6987 gpr_reg_base to point to the smaller numbered register of
6988 the integer register pair. So if we have an even register
6989 number, then decrement the gpr base. */
6990 gpr_reg_base -= ((gpr_reg_base % 2) == 0);
6991
6992 /* FP values behave sanely, except that each FP reg is only
6993 half of word. */
6994 fpr_reg_base += ((fpr_reg_base % 2) == 0);
6995 }
6996 else
6997 {
6998 rtx loc[8];
6999 int i, offset = 0, ub;
7000 ub = FUNCTION_ARG_SIZE (mode, type);
7001 ub = MIN(ub,
7002 MAX(0, max_arg_words - cum->words - (cum->words & 1)));
7003 gpr_reg_base -= (cum->words & 1);
7004 for (i = 0; i < ub; i++)
7005 {
7006 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7007 gen_rtx_REG (DImode,
7008 gpr_reg_base),
7009 GEN_INT(offset));
7010 gpr_reg_base -= 1;
7011 offset += 8;
7012 }
7013 if (ub == 0)
7014 return NULL_RTX;
7015 else if (ub == 1)
7016 return XEXP (loc[0], 0);
7017 else
7018 return gen_rtx_PARALLEL(mode, gen_rtvec_v(ub, loc));
7019 }
7020 }
7021 }
7022 else
7023 {
7024 /* If the argument is larger than a word, then we know precisely
7025 which registers we must use. */
7026 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7027 {
7028 if (cum->words)
7029 {
7030 gpr_reg_base = 23;
7031 fpr_reg_base = 38;
7032 }
7033 else
7034 {
7035 gpr_reg_base = 25;
7036 fpr_reg_base = 34;
7037 }
7038 }
7039 else
7040 {
7041 /* We have a single word (32 bits). A simple computation
7042 will get us the register #s we need. */
7043 gpr_reg_base = 26 - cum->words;
7044 fpr_reg_base = 32 + 2 * cum->words;
7045 }
7046 }
7047
7048 if (TARGET_64BIT && mode == TFmode)
7049 {
7050 return
7051 gen_rtx_PARALLEL
7052 (mode,
7053 gen_rtvec (2,
7054 gen_rtx_EXPR_LIST (VOIDmode,
7055 gen_rtx_REG (DImode, gpr_reg_base + 1),
7056 const0_rtx),
7057 gen_rtx_EXPR_LIST (VOIDmode,
7058 gen_rtx_REG (DImode, gpr_reg_base),
7059 GEN_INT (8))));
7060 }
7061 /* Determine if the register needs to be passed in both general and
7062 floating point registers. */
7063 if ((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
7064 /* If we are doing soft-float with portable runtime, then there
7065 is no need to worry about FP regs. */
7066 && ! TARGET_SOFT_FLOAT
7067 /* The parameter must be some kind of float, else we can just
7068 pass it in integer registers. */
7069 && FLOAT_MODE_P (mode)
7070 /* The target function must not have a prototype. */
7071 && cum->nargs_prototype <= 0
7072 /* libcalls do not need to pass items in both FP and general
7073 registers. */
7074 && type != NULL_TREE
7075 /* All this hair applies to outgoing args only. */
7076 && !incoming)
7077 {
7078 retval
7079 = gen_rtx_PARALLEL
7080 (mode,
7081 gen_rtvec (2,
7082 gen_rtx_EXPR_LIST (VOIDmode,
7083 gen_rtx_REG (mode, fpr_reg_base),
7084 const0_rtx),
7085 gen_rtx_EXPR_LIST (VOIDmode,
7086 gen_rtx_REG (mode, gpr_reg_base),
7087 const0_rtx)));
7088 }
7089 else
7090 {
7091 /* See if we should pass this parameter in a general register. */
7092 if (TARGET_SOFT_FLOAT
7093 /* Indirect calls in the normal 32bit ABI require all arguments
7094 to be passed in general registers. */
7095 || (!TARGET_PORTABLE_RUNTIME
7096 && !TARGET_64BIT
7097 && cum->indirect)
7098 /* If the parameter is not a floating point parameter, then
7099 it belongs in GPRs. */
7100 || !FLOAT_MODE_P (mode))
7101 retval = gen_rtx_REG (mode, gpr_reg_base);
7102 else
7103 retval = gen_rtx_REG (mode, fpr_reg_base);
7104 }
7105 return retval;
7106 }
7107
7108
7109 /* If this arg would be passed totally in registers or totally on the stack,
7110 then this routine should return zero. It is currently called only for
7111 the 64-bit target. */
7112 int
7113 function_arg_partial_nregs (cum, mode, type, named)
7114 CUMULATIVE_ARGS *cum;
7115 enum machine_mode mode;
7116 tree type;
7117 int named ATTRIBUTE_UNUSED;
7118 {
7119 unsigned int max_arg_words = 8;
7120 unsigned int offset = 0;
7121
7122 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7123 offset = 1;
7124
7125 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
7126 /* Arg fits fully into registers. */
7127 return 0;
7128 else if (cum->words + offset >= max_arg_words)
7129 /* Arg fully on the stack. */
7130 return 0;
7131 else
7132 /* Arg is split. */
7133 return max_arg_words - cum->words - offset;
7134
7135 }
7136
7137
7138 /* Return 1 if this is a comparison operator. This allows the use of
7139 MATCH_OPERATOR to recognize all the branch insns. */
7140
7141 int
7142 cmpib_comparison_operator (op, mode)
7143 register rtx op;
7144 enum machine_mode mode;
7145 {
7146 return ((mode == VOIDmode || GET_MODE (op) == mode)
7147 && (GET_CODE (op) == EQ
7148 || GET_CODE (op) == NE
7149 || GET_CODE (op) == GT
7150 || GET_CODE (op) == GTU
7151 || GET_CODE (op) == GE
7152 || GET_CODE (op) == LT
7153 || GET_CODE (op) == LE
7154 || GET_CODE (op) == LEU));
7155 }
7156
7157 /* Mark ARG (which is really a struct deferred_plabel **) for GC. */
7158
7159 static void
7160 mark_deferred_plabels (arg)
7161 void *arg;
7162 {
7163 struct deferred_plabel *dp = *(struct deferred_plabel **) arg;
7164 int i;
7165
7166 for (i = 0; i < n_deferred_plabels; ++i)
7167 ggc_mark_rtx (dp[i].internal_label);
7168 }
7169
7170 /* Called to register all of our global variables with the garbage
7171 collector. */
7172
7173 static void
7174 pa_add_gc_roots ()
7175 {
7176 ggc_add_rtx_root (&hppa_compare_op0, 1);
7177 ggc_add_rtx_root (&hppa_compare_op1, 1);
7178 ggc_add_root (&deferred_plabels, 1, sizeof (&deferred_plabels),
7179 &mark_deferred_plabels);
7180 }