1d9100498cc207061b20edae9629a799f2272414
[gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include <stdio.h>
23 #include "config.h"
24 #include "rtl.h"
25 #include "regs.h"
26 #include "hard-reg-set.h"
27 #include "real.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "c-tree.h"
37 #include "expr.h"
38 #include "obstack.h"
39
40 /* Save the operands last given to a compare for use when we
41 generate a scc or bcc insn. */
42
43 rtx hppa_compare_op0, hppa_compare_op1;
44 enum cmp_type hppa_branch_type;
45
46 /* Which cpu we are scheduling for. */
47 enum processor_type pa_cpu;
48
49 /* String to hold which cpu we are scheduling for. */
50 char *pa_cpu_string;
51
52 /* Set by the FUNCTION_PROFILER macro. */
53 int hp_profile_labelno;
54
55 /* Counts for the number of callee-saved general and floating point
56 registers which were saved by the current function's prologue. */
57 static int gr_saved, fr_saved;
58
59 /* Whether or not the current function uses an out-of-line prologue
60 and epilogue. */
61 static int out_of_line_prologue_epilogue;
62
63 static rtx find_addr_reg ();
64
65 /* Keep track of the number of bytes we have output in the CODE subspaces
66 during this compilation so we'll know when to emit inline long-calls. */
67
68 unsigned int total_code_bytes;
69
70 /* Variables to handle plabels that we discover are necessary at assembly
71 output time. They are output after the current function. */
72
73 struct defer_plab
74 {
75 rtx internal_label;
76 rtx symbol;
77 } *deferred_plabels = 0;
78 int n_deferred_plabels = 0;
79
80 void
81 override_options ()
82 {
83 /* Default to 7100 scheduling. If the 7100LC scheduling ever
84 gets reasonably tuned, it should be the default since that
85 what most PAs sold now are. */
86 if (pa_cpu_string == NULL
87 || ! strcmp (pa_cpu_string, "7100"))
88 {
89 pa_cpu_string = "7100";
90 pa_cpu = PROCESSOR_7100;
91 }
92 else if (! strcmp (pa_cpu_string, "700"))
93 {
94 pa_cpu_string = "700";
95 pa_cpu = PROCESSOR_700;
96 }
97 else if (! strcmp (pa_cpu_string, "7100LC"))
98 {
99 pa_cpu_string = "7100LC";
100 pa_cpu = PROCESSOR_7100LC;
101 }
102 else
103 {
104 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
105 }
106
107 if (flag_pic && TARGET_PORTABLE_RUNTIME)
108 {
109 warning ("PIC code generation is not supported in the portable runtime model\n");
110 }
111
112 if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
113 {
114 warning ("PIC code generation is not compatable with fast indirect calls\n");
115 }
116
117 if (flag_pic && profile_flag)
118 {
119 warning ("PIC code generation is not compatable with profiling\n");
120 }
121
122 if (TARGET_SPACE && (flag_pic || profile_flag))
123 {
124 warning ("Out of line entry/exit sequences are not compatable\n");
125 warning ("with PIC or profiling\n");
126 }
127
128 if (! TARGET_GAS && write_symbols != NO_DEBUG)
129 {
130 warning ("-g is only supported when using GAS on this processor,");
131 warning ("-g option disabled.");
132 write_symbols = NO_DEBUG;
133 }
134 }
135
136
137 /* Return non-zero only if OP is a register of mode MODE,
138 or CONST0_RTX. */
139 int
140 reg_or_0_operand (op, mode)
141 rtx op;
142 enum machine_mode mode;
143 {
144 return (op == CONST0_RTX (mode) || register_operand (op, mode));
145 }
146
147 /* Return non-zero if OP is suitable for use in a call to a named
148 function.
149
150 (???) For 2.5 try to eliminate either call_operand_address or
151 function_label_operand, they perform very similar functions. */
152 int
153 call_operand_address (op, mode)
154 rtx op;
155 enum machine_mode mode;
156 {
157 return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
158 }
159
160 /* Return 1 if X contains a symbolic expression. We know these
161 expressions will have one of a few well defined forms, so
162 we need only check those forms. */
163 int
164 symbolic_expression_p (x)
165 register rtx x;
166 {
167
168 /* Strip off any HIGH. */
169 if (GET_CODE (x) == HIGH)
170 x = XEXP (x, 0);
171
172 return (symbolic_operand (x, VOIDmode));
173 }
174
175 int
176 symbolic_operand (op, mode)
177 register rtx op;
178 enum machine_mode mode;
179 {
180 switch (GET_CODE (op))
181 {
182 case SYMBOL_REF:
183 case LABEL_REF:
184 return 1;
185 case CONST:
186 op = XEXP (op, 0);
187 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
188 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
189 && GET_CODE (XEXP (op, 1)) == CONST_INT);
190 default:
191 return 0;
192 }
193 }
194
195 /* Return truth value of statement that OP is a symbolic memory
196 operand of mode MODE. */
197
198 int
199 symbolic_memory_operand (op, mode)
200 rtx op;
201 enum machine_mode mode;
202 {
203 if (GET_CODE (op) == SUBREG)
204 op = SUBREG_REG (op);
205 if (GET_CODE (op) != MEM)
206 return 0;
207 op = XEXP (op, 0);
208 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
209 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
210 }
211
212 /* Return 1 if the operand is either a register or a memory operand that is
213 not symbolic. */
214
215 int
216 reg_or_nonsymb_mem_operand (op, mode)
217 register rtx op;
218 enum machine_mode mode;
219 {
220 if (register_operand (op, mode))
221 return 1;
222
223 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
224 return 1;
225
226 return 0;
227 }
228
229 /* Return 1 if the operand is either a register, zero, or a memory operand
230 that is not symbolic. */
231
232 int
233 reg_or_0_or_nonsymb_mem_operand (op, mode)
234 register rtx op;
235 enum machine_mode mode;
236 {
237 if (register_operand (op, mode))
238 return 1;
239
240 if (op == CONST0_RTX (mode))
241 return 1;
242
243 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
244 return 1;
245
246 return 0;
247 }
248
249 /* Accept any constant that can be moved in one instructions into a
250 general register. */
251 int
252 cint_ok_for_move (intval)
253 HOST_WIDE_INT intval;
254 {
255 /* OK if ldo, ldil, or zdepi, can be used. */
256 return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
257 || zdepi_cint_p (intval));
258 }
259
260 /* Accept anything that can be moved in one instruction into a general
261 register. */
262 int
263 move_operand (op, mode)
264 rtx op;
265 enum machine_mode mode;
266 {
267 if (register_operand (op, mode))
268 return 1;
269
270 if (GET_CODE (op) == CONST_INT)
271 return cint_ok_for_move (INTVAL (op));
272
273 if (GET_CODE (op) == SUBREG)
274 op = SUBREG_REG (op);
275 if (GET_CODE (op) != MEM)
276 return 0;
277
278 op = XEXP (op, 0);
279 if (GET_CODE (op) == LO_SUM)
280 return (register_operand (XEXP (op, 0), Pmode)
281 && CONSTANT_P (XEXP (op, 1)));
282
283 /* Since move_operand is only used for source operands, we can always
284 allow scaled indexing! */
285 if (GET_CODE (op) == PLUS
286 && ((GET_CODE (XEXP (op, 0)) == MULT
287 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
288 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
289 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
290 && GET_CODE (XEXP (op, 1)) == REG)
291 || (GET_CODE (XEXP (op, 1)) == MULT
292 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
293 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
294 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
295 && GET_CODE (XEXP (op, 0)) == REG)))
296 return 1;
297
298 return memory_address_p (mode, op);
299 }
300
301 /* Accept REG and any CONST_INT that can be moved in one instruction into a
302 general register. */
303 int
304 reg_or_cint_move_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
307 {
308 if (register_operand (op, mode))
309 return 1;
310
311 if (GET_CODE (op) == CONST_INT)
312 return cint_ok_for_move (INTVAL (op));
313
314 return 0;
315 }
316
317 int
318 pic_label_operand (op, mode)
319 rtx op;
320 enum machine_mode mode;
321 {
322 if (!flag_pic)
323 return 0;
324
325 switch (GET_CODE (op))
326 {
327 case LABEL_REF:
328 return 1;
329 case CONST:
330 op = XEXP (op, 0);
331 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
332 && GET_CODE (XEXP (op, 1)) == CONST_INT);
333 default:
334 return 0;
335 }
336 }
337
338 int
339 fp_reg_operand (op, mode)
340 rtx op;
341 enum machine_mode mode;
342 {
343 return reg_renumber && FP_REG_P (op);
344 }
345
346 \f
347
348 /* Return truth value of whether OP can be used as an operand in a
349 three operand arithmetic insn that accepts registers of mode MODE
350 or 14-bit signed integers. */
351 int
352 arith_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
355 {
356 return (register_operand (op, mode)
357 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
358 }
359
360 /* Return truth value of whether OP can be used as an operand in a
361 three operand arithmetic insn that accepts registers of mode MODE
362 or 11-bit signed integers. */
363 int
364 arith11_operand (op, mode)
365 rtx op;
366 enum machine_mode mode;
367 {
368 return (register_operand (op, mode)
369 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
370 }
371
372 /* A constant integer suitable for use in a PRE_MODIFY memory
373 reference. */
374 int
375 pre_cint_operand (op, mode)
376 rtx op;
377 enum machine_mode mode;
378 {
379 return (GET_CODE (op) == CONST_INT
380 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
381 }
382
383 /* A constant integer suitable for use in a POST_MODIFY memory
384 reference. */
385 int
386 post_cint_operand (op, mode)
387 rtx op;
388 enum machine_mode mode;
389 {
390 return (GET_CODE (op) == CONST_INT
391 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
392 }
393
394 int
395 arith_double_operand (op, mode)
396 rtx op;
397 enum machine_mode mode;
398 {
399 return (register_operand (op, mode)
400 || (GET_CODE (op) == CONST_DOUBLE
401 && GET_MODE (op) == mode
402 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
403 && (CONST_DOUBLE_HIGH (op) >= 0
404 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
405 }
406
407 /* Return truth value of whether OP is a integer which fits the
408 range constraining immediate operands in three-address insns, or
409 is an integer register. */
410
411 int
412 ireg_or_int5_operand (op, mode)
413 rtx op;
414 enum machine_mode mode;
415 {
416 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
417 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
418 }
419
420 /* Return truth value of whether OP is a integer which fits the
421 range constraining immediate operands in three-address insns. */
422
423 int
424 int5_operand (op, mode)
425 rtx op;
426 enum machine_mode mode;
427 {
428 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
429 }
430
431 int
432 uint5_operand (op, mode)
433 rtx op;
434 enum machine_mode mode;
435 {
436 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
437 }
438
439 int
440 int11_operand (op, mode)
441 rtx op;
442 enum machine_mode mode;
443 {
444 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
445 }
446
447 int
448 uint32_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
451 {
452 #if HOST_BITS_PER_WIDE_INT > 32
453 /* All allowed constants will fit a CONST_INT. */
454 return (GET_CODE (op) == CONST_INT
455 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
456 #else
457 return (GET_CODE (op) == CONST_INT
458 || (GET_CODE (op) == CONST_DOUBLE
459 && CONST_DOUBLE_HIGH (op) == 0));
460 #endif
461 }
462
463 int
464 arith5_operand (op, mode)
465 rtx op;
466 enum machine_mode mode;
467 {
468 return register_operand (op, mode) || int5_operand (op, mode);
469 }
470
471 /* True iff zdepi can be used to generate this CONST_INT. */
472 int
473 zdepi_cint_p (x)
474 unsigned HOST_WIDE_INT x;
475 {
476 unsigned HOST_WIDE_INT lsb_mask, t;
477
478 /* This might not be obvious, but it's at least fast.
479 This function is critical; we don't have the time loops would take. */
480 lsb_mask = x & -x;
481 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
482 /* Return true iff t is a power of two. */
483 return ((t & (t - 1)) == 0);
484 }
485
486 /* True iff depi or extru can be used to compute (reg & mask).
487 Accept bit pattern like these:
488 0....01....1
489 1....10....0
490 1..10..01..1 */
491 int
492 and_mask_p (mask)
493 unsigned HOST_WIDE_INT mask;
494 {
495 mask = ~mask;
496 mask += mask & -mask;
497 return (mask & (mask - 1)) == 0;
498 }
499
500 /* True iff depi or extru can be used to compute (reg & OP). */
501 int
502 and_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
505 {
506 return (register_operand (op, mode)
507 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
508 }
509
510 /* True iff depi can be used to compute (reg | MASK). */
511 int
512 ior_mask_p (mask)
513 unsigned HOST_WIDE_INT mask;
514 {
515 mask += mask & -mask;
516 return (mask & (mask - 1)) == 0;
517 }
518
519 /* True iff depi can be used to compute (reg | OP). */
520 int
521 ior_operand (op, mode)
522 rtx op;
523 enum machine_mode mode;
524 {
525 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
526 }
527
528 int
529 lhs_lshift_operand (op, mode)
530 rtx op;
531 enum machine_mode mode;
532 {
533 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
534 }
535
536 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
537 Such values can be the left hand side x in (x << r), using the zvdepi
538 instruction. */
539 int
540 lhs_lshift_cint_operand (op, mode)
541 rtx op;
542 enum machine_mode mode;
543 {
544 unsigned HOST_WIDE_INT x;
545 if (GET_CODE (op) != CONST_INT)
546 return 0;
547 x = INTVAL (op) >> 4;
548 return (x & (x + 1)) == 0;
549 }
550
551 int
552 arith32_operand (op, mode)
553 rtx op;
554 enum machine_mode mode;
555 {
556 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
557 }
558
559 int
560 pc_or_label_operand (op, mode)
561 rtx op;
562 enum machine_mode mode;
563 {
564 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
565 }
566 \f
567 /* Legitimize PIC addresses. If the address is already
568 position-independent, we return ORIG. Newly generated
569 position-independent addresses go to REG. If we need more
570 than one register, we lose. */
571
572 rtx
573 legitimize_pic_address (orig, mode, reg)
574 rtx orig, reg;
575 enum machine_mode mode;
576 {
577 rtx pic_ref = orig;
578
579 /* Labels need special handling. */
580 if (pic_label_operand (orig))
581 {
582 emit_insn (gen_pic_load_label (reg, orig));
583 current_function_uses_pic_offset_table = 1;
584 return reg;
585 }
586 if (GET_CODE (orig) == SYMBOL_REF)
587 {
588 if (reg == 0)
589 abort ();
590
591 if (flag_pic == 2)
592 {
593 emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
594 pic_ref = gen_rtx (MEM, Pmode,
595 gen_rtx (LO_SUM, Pmode, reg,
596 gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
597 }
598 else
599 pic_ref = gen_rtx (MEM, Pmode,
600 gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
601 current_function_uses_pic_offset_table = 1;
602 RTX_UNCHANGING_P (pic_ref) = 1;
603 emit_move_insn (reg, pic_ref);
604 return reg;
605 }
606 else if (GET_CODE (orig) == CONST)
607 {
608 rtx base;
609
610 if (GET_CODE (XEXP (orig, 0)) == PLUS
611 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
612 return orig;
613
614 if (reg == 0)
615 abort ();
616
617 if (GET_CODE (XEXP (orig, 0)) == PLUS)
618 {
619 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
620 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
621 base == reg ? 0 : reg);
622 }
623 else abort ();
624 if (GET_CODE (orig) == CONST_INT)
625 {
626 if (INT_14_BITS (orig))
627 return plus_constant_for_output (base, INTVAL (orig));
628 orig = force_reg (Pmode, orig);
629 }
630 pic_ref = gen_rtx (PLUS, Pmode, base, orig);
631 /* Likewise, should we set special REG_NOTEs here? */
632 }
633 return pic_ref;
634 }
635
636 /* Try machine-dependent ways of modifying an illegitimate address
637 to be legitimate. If we find one, return the new, valid address.
638 This macro is used in only one place: `memory_address' in explow.c.
639
640 OLDX is the address as it was before break_out_memory_refs was called.
641 In some cases it is useful to look at this to decide what needs to be done.
642
643 MODE and WIN are passed so that this macro can use
644 GO_IF_LEGITIMATE_ADDRESS.
645
646 It is always safe for this macro to do nothing. It exists to recognize
647 opportunities to optimize the output.
648
649 For the PA, transform:
650
651 memory(X + <large int>)
652
653 into:
654
655 if (<large int> & mask) >= 16
656 Y = (<large int> & ~mask) + mask + 1 Round up.
657 else
658 Y = (<large int> & ~mask) Round down.
659 Z = X + Y
660 memory (Z + (<large int> - Y));
661
662 This is for CSE to find several similar references, and only use one Z.
663
664 X can either be a SYMBOL_REF or REG, but because combine can not
665 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
666 D will not fit in 14 bits.
667
668 MODE_FLOAT references allow displacements which fit in 5 bits, so use
669 0x1f as the mask.
670
671 MODE_INT references allow displacements which fit in 14 bits, so use
672 0x3fff as the mask.
673
674 This relies on the fact that most mode MODE_FLOAT references will use FP
675 registers and most mode MODE_INT references will use integer registers.
676 (In the rare case of an FP register used in an integer MODE, we depend
677 on secondary reloads to clean things up.)
678
679
680 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
681 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
682 addressing modes to be used).
683
684 Put X and Z into registers. Then put the entire expression into
685 a register. */
686
687 rtx
688 hppa_legitimize_address (x, oldx, mode)
689 rtx x, oldx;
690 enum machine_mode mode;
691 {
692 rtx orig = x;
693
694 if (flag_pic)
695 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
696
697 /* Strip off CONST. */
698 if (GET_CODE (x) == CONST)
699 x = XEXP (x, 0);
700
701 /* Special case. Get the SYMBOL_REF into a register and use indexing.
702 That should always be safe. */
703 if (GET_CODE (x) == PLUS
704 && GET_CODE (XEXP (x, 0)) == REG
705 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
706 {
707 rtx reg = force_reg (SImode, XEXP (x, 1));
708 return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
709 }
710
711 /* Note we must reject symbols which represent function addresses
712 since the assembler/linker can't handle arithmetic on plabels. */
713 if (GET_CODE (x) == PLUS
714 && GET_CODE (XEXP (x, 1)) == CONST_INT
715 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
716 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
717 || GET_CODE (XEXP (x, 0)) == REG))
718 {
719 rtx int_part, ptr_reg;
720 int newoffset;
721 int offset = INTVAL (XEXP (x, 1));
722 int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
723
724 /* Choose which way to round the offset. Round up if we
725 are >= halfway to the next boundary. */
726 if ((offset & mask) >= ((mask + 1) / 2))
727 newoffset = (offset & ~ mask) + mask + 1;
728 else
729 newoffset = (offset & ~ mask);
730
731 /* If the newoffset will not fit in 14 bits (ldo), then
732 handling this would take 4 or 5 instructions (2 to load
733 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
734 add the new offset and the SYMBOL_REF.) Combine can
735 not handle 4->2 or 5->2 combinations, so do not create
736 them. */
737 if (! VAL_14_BITS_P (newoffset)
738 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
739 {
740 rtx const_part = gen_rtx (CONST, VOIDmode,
741 gen_rtx (PLUS, Pmode,
742 XEXP (x, 0),
743 GEN_INT (newoffset)));
744 rtx tmp_reg
745 = force_reg (Pmode,
746 gen_rtx (HIGH, Pmode, const_part));
747 ptr_reg
748 = force_reg (Pmode,
749 gen_rtx (LO_SUM, Pmode,
750 tmp_reg, const_part));
751 }
752 else
753 {
754 if (! VAL_14_BITS_P (newoffset))
755 int_part = force_reg (Pmode, GEN_INT (newoffset));
756 else
757 int_part = GEN_INT (newoffset);
758
759 ptr_reg = force_reg (Pmode,
760 gen_rtx (PLUS, Pmode,
761 force_reg (Pmode, XEXP (x, 0)),
762 int_part));
763 }
764 return plus_constant (ptr_reg, offset - newoffset);
765 }
766
767 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
768
769 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
771 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
772 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
773 || GET_CODE (XEXP (x, 1)) == SUBREG)
774 && GET_CODE (XEXP (x, 1)) != CONST)
775 {
776 int val = INTVAL (XEXP (XEXP (x, 0), 1));
777 rtx reg1, reg2;
778
779 reg1 = XEXP (x, 1);
780 if (GET_CODE (reg1) != REG)
781 reg1 = force_reg (Pmode, force_operand (reg1, 0));
782
783 reg2 = XEXP (XEXP (x, 0), 0);
784 if (GET_CODE (reg2) != REG)
785 reg2 = force_reg (Pmode, force_operand (reg2, 0));
786
787 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
788 gen_rtx (MULT, Pmode,
789 reg2, GEN_INT (val)),
790 reg1));
791 }
792
793 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
794
795 Only do so for floating point modes since this is more speculative
796 and we lose if it's an integer store. */
797 if (GET_CODE (x) == PLUS
798 && GET_CODE (XEXP (x, 0)) == PLUS
799 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
800 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
801 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
802 && (mode == SFmode || mode == DFmode))
803 {
804
805 /* First, try and figure out what to use as a base register. */
806 rtx reg1, reg2, base, idx, orig_base;
807
808 reg1 = XEXP (XEXP (x, 0), 1);
809 reg2 = XEXP (x, 1);
810 base = NULL_RTX;
811 idx = NULL_RTX;
812
813 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
814 then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
815 know it's a base register below. */
816 if (GET_CODE (reg1) != REG)
817 reg1 = force_reg (Pmode, force_operand (reg1, 0));
818
819 if (GET_CODE (reg2) != REG)
820 reg2 = force_reg (Pmode, force_operand (reg2, 0));
821
822 /* Figure out what the base and index are. */
823
824 if (GET_CODE (reg1) == REG
825 && REGNO_POINTER_FLAG (REGNO (reg1)))
826 {
827 base = reg1;
828 orig_base = XEXP (XEXP (x, 0), 1);
829 idx = gen_rtx (PLUS, Pmode,
830 gen_rtx (MULT, Pmode,
831 XEXP (XEXP (XEXP (x, 0), 0), 0),
832 XEXP (XEXP (XEXP (x, 0), 0), 1)),
833 XEXP (x, 1));
834 }
835 else if (GET_CODE (reg2) == REG
836 && REGNO_POINTER_FLAG (REGNO (reg2)))
837 {
838 base = reg2;
839 orig_base = XEXP (x, 1);
840 idx = XEXP (x, 0);
841 }
842
843 if (base == 0)
844 return orig;
845
846 /* If the index adds a large constant, try to scale the
847 constant so that it can be loaded with only one insn. */
848 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
849 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
850 / INTVAL (XEXP (XEXP (idx, 0), 1)))
851 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
852 {
853 /* Divide the CONST_INT by the scale factor, then add it to A. */
854 int val = INTVAL (XEXP (idx, 1));
855
856 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
857 reg1 = XEXP (XEXP (idx, 0), 0);
858 if (GET_CODE (reg1) != REG)
859 reg1 = force_reg (Pmode, force_operand (reg1, 0));
860
861 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
862
863 /* We can now generate a simple scaled indexed address. */
864 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
865 gen_rtx (MULT, Pmode, reg1,
866 XEXP (XEXP (idx, 0), 1)),
867 base));
868 }
869
870 /* If B + C is still a valid base register, then add them. */
871 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
872 && INTVAL (XEXP (idx, 1)) <= 4096
873 && INTVAL (XEXP (idx, 1)) >= -4096)
874 {
875 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
876 rtx reg1, reg2;
877
878 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
879
880 reg2 = XEXP (XEXP (idx, 0), 0);
881 if (GET_CODE (reg2) != CONST_INT)
882 reg2 = force_reg (Pmode, force_operand (reg2, 0));
883
884 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
885 gen_rtx (MULT, Pmode,
886 reg2, GEN_INT (val)),
887 reg1));
888 }
889
890 /* Get the index into a register, then add the base + index and
891 return a register holding the result. */
892
893 /* First get A into a register. */
894 reg1 = XEXP (XEXP (idx, 0), 0);
895 if (GET_CODE (reg1) != REG)
896 reg1 = force_reg (Pmode, force_operand (reg1, 0));
897
898 /* And get B into a register. */
899 reg2 = XEXP (idx, 1);
900 if (GET_CODE (reg2) != REG)
901 reg2 = force_reg (Pmode, force_operand (reg2, 0));
902
903 reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
904 gen_rtx (MULT, Pmode, reg1,
905 XEXP (XEXP (idx, 0), 1)),
906 reg2));
907
908 /* Add the result to our base register and return. */
909 return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
910
911 }
912
913 /* Uh-oh. We might have an address for x[n-100000]. This needs
914 special handling to avoid creating an indexed memory address
915 with x-100000 as the base.
916
917 If the constant part is small enough, then it's still safe because
918 there is a guard page at the beginning and end of the data segment.
919
920 Scaled references are common enough that we want to try and rearrange the
921 terms so that we can use indexing for these addresses too. Only
922 do the optimization for floatint point modes. */
923
924 if (GET_CODE (x) == PLUS
925 && symbolic_expression_p (XEXP (x, 1)))
926 {
927 /* Ugly. We modify things here so that the address offset specified
928 by the index expression is computed first, then added to x to form
929 the entire address. */
930
931 rtx regx1, regx2, regy1, regy2, y;
932
933 /* Strip off any CONST. */
934 y = XEXP (x, 1);
935 if (GET_CODE (y) == CONST)
936 y = XEXP (y, 0);
937
938 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
939 {
940 /* See if this looks like
941 (plus (mult (reg) (shadd_const))
942 (const (plus (symbol_ref) (const_int))))
943
944 Where const_int is small. In that case the const
945 expression is a valid pointer for indexing.
946
947 If const_int is big, but can be divided evenly by shadd_const
948 and added to (reg). This allows more scaled indexed addresses. */
949 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
950 && GET_CODE (XEXP (x, 0)) == MULT
951 && GET_CODE (XEXP (y, 1)) == CONST_INT
952 && INTVAL (XEXP (y, 1)) >= -4096
953 && INTVAL (XEXP (y, 1)) <= 4095
954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
955 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
956 {
957 int val = INTVAL (XEXP (XEXP (x, 0), 1));
958 rtx reg1, reg2;
959
960 reg1 = XEXP (x, 1);
961 if (GET_CODE (reg1) != REG)
962 reg1 = force_reg (Pmode, force_operand (reg1, 0));
963
964 reg2 = XEXP (XEXP (x, 0), 0);
965 if (GET_CODE (reg2) != REG)
966 reg2 = force_reg (Pmode, force_operand (reg2, 0));
967
968 return force_reg (Pmode, gen_rtx (PLUS, Pmode,
969 gen_rtx (MULT, Pmode,
970 reg2, GEN_INT (val)),
971 reg1));
972 }
973 else if ((mode == DFmode || mode == SFmode)
974 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
975 && GET_CODE (XEXP (x, 0)) == MULT
976 && GET_CODE (XEXP (y, 1)) == CONST_INT
977 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
979 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
980 {
981 regx1
982 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
983 / INTVAL (XEXP (XEXP (x, 0), 1))));
984 regx2 = XEXP (XEXP (x, 0), 0);
985 if (GET_CODE (regx2) != REG)
986 regx2 = force_reg (Pmode, force_operand (regx2, 0));
987 regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
988 regx2, regx1));
989 return force_reg (Pmode,
990 gen_rtx (PLUS, Pmode,
991 gen_rtx (MULT, Pmode, regx2,
992 XEXP (XEXP (x, 0), 1)),
993 force_reg (Pmode, XEXP (y, 0))));
994 }
995 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
996 && INTVAL (XEXP (y, 1)) >= -4096
997 && INTVAL (XEXP (y, 1)) <= 4095)
998 {
999 /* This is safe because of the guard page at the
1000 beginning and end of the data space. Just
1001 return the original address. */
1002 return orig;
1003 }
1004 else
1005 {
1006 /* Doesn't look like one we can optimize. */
1007 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1008 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1009 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1010 regx1 = force_reg (Pmode,
1011 gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
1012 return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
1013 }
1014 }
1015 }
1016
1017 return orig;
1018 }
1019
1020 /* For the HPPA, REG and REG+CONST is cost 0
1021 and addresses involving symbolic constants are cost 2.
1022
1023 PIC addresses are very expensive.
1024
1025 It is no coincidence that this has the same structure
1026 as GO_IF_LEGITIMATE_ADDRESS. */
1027 int
1028 hppa_address_cost (X)
1029 rtx X;
1030 {
1031 if (GET_CODE (X) == PLUS)
1032 return 1;
1033 else if (GET_CODE (X) == LO_SUM)
1034 return 1;
1035 else if (GET_CODE (X) == HIGH)
1036 return 2;
1037 return 4;
1038 }
1039
1040 /* Emit insns to move operands[1] into operands[0].
1041
1042 Return 1 if we have written out everything that needs to be done to
1043 do the move. Otherwise, return 0 and the caller will emit the move
1044 normally. */
1045
1046 int
1047 emit_move_sequence (operands, mode, scratch_reg)
1048 rtx *operands;
1049 enum machine_mode mode;
1050 rtx scratch_reg;
1051 {
1052 register rtx operand0 = operands[0];
1053 register rtx operand1 = operands[1];
1054
1055 if (reload_in_progress && GET_CODE (operand0) == REG
1056 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1057 operand0 = reg_equiv_mem[REGNO (operand0)];
1058 else if (reload_in_progress && GET_CODE (operand0) == SUBREG
1059 && GET_CODE (SUBREG_REG (operand0)) == REG
1060 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1061 {
1062 SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
1063 operand0 = alter_subreg (operand0);
1064 }
1065
1066 if (reload_in_progress && GET_CODE (operand1) == REG
1067 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1068 operand1 = reg_equiv_mem[REGNO (operand1)];
1069 else if (reload_in_progress && GET_CODE (operand1) == SUBREG
1070 && GET_CODE (SUBREG_REG (operand1)) == REG
1071 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1072 {
1073 SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
1074 operand1 = alter_subreg (operand1);
1075 }
1076
1077 /* Handle secondary reloads for loads/stores of FP registers from
1078 REG+D addresses where D does not fit in 5 bits, including
1079 (subreg (mem (addr))) cases. */
1080 if (fp_reg_operand (operand0, mode)
1081 && ((GET_CODE (operand1) == MEM
1082 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1083 || ((GET_CODE (operand1) == SUBREG
1084 && GET_CODE (XEXP (operand1, 0)) == MEM
1085 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1086 && scratch_reg)
1087 {
1088 if (GET_CODE (operand1) == SUBREG)
1089 operand1 = XEXP (operand1, 0);
1090
1091 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1092
1093 /* D might not fit in 14 bits either; for such cases load D into
1094 scratch reg. */
1095 if (!memory_address_p (SImode, XEXP (operand1, 0)))
1096 {
1097 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1098 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
1099 SImode,
1100 XEXP (XEXP (operand1, 0), 0),
1101 scratch_reg));
1102 }
1103 else
1104 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1105 emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
1106 scratch_reg)));
1107 return 1;
1108 }
1109 else if (fp_reg_operand (operand1, mode)
1110 && ((GET_CODE (operand0) == MEM
1111 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1112 || ((GET_CODE (operand0) == SUBREG)
1113 && GET_CODE (XEXP (operand0, 0)) == MEM
1114 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1115 && scratch_reg)
1116 {
1117 if (GET_CODE (operand0) == SUBREG)
1118 operand0 = XEXP (operand0, 0);
1119
1120 scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
1121 /* D might not fit in 14 bits either; for such cases load D into
1122 scratch reg. */
1123 if (!memory_address_p (SImode, XEXP (operand0, 0)))
1124 {
1125 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1126 emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
1127 SImode,
1128 XEXP (XEXP (operand0, 0), 0),
1129 scratch_reg));
1130 }
1131 else
1132 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1133 emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
1134 operand1));
1135 return 1;
1136 }
1137 /* Handle secondary reloads for loads of FP registers from constant
1138 expressions by forcing the constant into memory.
1139
1140 use scratch_reg to hold the address of the memory location.
1141
1142 ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
1143 NO_REGS when presented with a const_int and an register class
1144 containing only FP registers. Doing so unfortunately creates
1145 more problems than it solves. Fix this for 2.5. */
1146 else if (fp_reg_operand (operand0, mode)
1147 && CONSTANT_P (operand1)
1148 && scratch_reg)
1149 {
1150 rtx xoperands[2];
1151
1152 /* Force the constant into memory and put the address of the
1153 memory location into scratch_reg. */
1154 xoperands[0] = scratch_reg;
1155 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1156 emit_move_sequence (xoperands, Pmode, 0);
1157
1158 /* Now load the destination register. */
1159 emit_insn (gen_rtx (SET, mode, operand0,
1160 gen_rtx (MEM, mode, scratch_reg)));
1161 return 1;
1162 }
1163 /* Handle secondary reloads for SAR. These occur when trying to load
1164 the SAR from memory a FP register, or with a constant. */
1165 else if (GET_CODE (operand0) == REG
1166 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1167 && (GET_CODE (operand1) == MEM
1168 || GET_CODE (operand1) == CONST_INT
1169 || (GET_CODE (operand1) == REG
1170 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1171 && scratch_reg)
1172 {
1173 emit_move_insn (scratch_reg, operand1);
1174 emit_move_insn (operand0, scratch_reg);
1175 return 1;
1176 }
1177 /* Handle most common case: storing into a register. */
1178 else if (register_operand (operand0, mode))
1179 {
1180 if (register_operand (operand1, mode)
1181 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1182 || (operand1 == CONST0_RTX (mode))
1183 || (GET_CODE (operand1) == HIGH
1184 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1185 /* Only `general_operands' can come here, so MEM is ok. */
1186 || GET_CODE (operand1) == MEM)
1187 {
1188 /* Run this case quickly. */
1189 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1190 return 1;
1191 }
1192 }
1193 else if (GET_CODE (operand0) == MEM)
1194 {
1195 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1196 {
1197 /* Run this case quickly. */
1198 emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1199 return 1;
1200 }
1201 if (! (reload_in_progress || reload_completed))
1202 {
1203 operands[0] = validize_mem (operand0);
1204 operands[1] = operand1 = force_reg (mode, operand1);
1205 }
1206 }
1207
1208 /* Simplify the source if we need to. */
1209 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1210 || (GET_CODE (operand1) == HIGH
1211 && symbolic_operand (XEXP (operand1, 0), mode)))
1212 {
1213 int ishighonly = 0;
1214
1215 if (GET_CODE (operand1) == HIGH)
1216 {
1217 ishighonly = 1;
1218 operand1 = XEXP (operand1, 0);
1219 }
1220 if (symbolic_operand (operand1, mode))
1221 {
1222 rtx const_part = NULL;
1223
1224 /* Argh. The assembler and linker can't handle arithmetic
1225 involving plabels. We'll have to split up operand1 here
1226 if it's a function label involved in an arithmetic
1227 expression. Luckily, this only happens with addition
1228 of constants to plabels, which simplifies the test.
1229
1230 We add the constant back in just before returning to
1231 our caller. */
1232 if (GET_CODE (operand1) == CONST
1233 && GET_CODE (XEXP (operand1, 0)) == PLUS
1234 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1235 {
1236 /* Save away the constant part of the expression. */
1237 const_part = XEXP (XEXP (operand1, 0), 1);
1238 if (GET_CODE (const_part) != CONST_INT)
1239 abort ();
1240
1241 /* Set operand1 to just the SYMBOL_REF. */
1242 operand1 = XEXP (XEXP (operand1, 0), 0);
1243 }
1244
1245 if (flag_pic)
1246 {
1247 rtx temp;
1248
1249 if (reload_in_progress || reload_completed)
1250 temp = scratch_reg ? scratch_reg : operand0;
1251 else
1252 temp = gen_reg_rtx (Pmode);
1253
1254 /* If operand1 is a function label, then we've got to
1255 force it to memory, then load op0 from memory. */
1256 if (function_label_operand (operand1, mode))
1257 {
1258 operands[1] = force_const_mem (mode, operand1);
1259 emit_move_sequence (operands, mode, temp);
1260 }
1261 /* Likewise for (const (plus (symbol) (const_int))) when
1262 generating pic code during or after reload and const_int
1263 will not fit in 14 bits. */
1264 else if (GET_CODE (operand1) == CONST
1265 && GET_CODE (XEXP (operand1, 0)) == PLUS
1266 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1267 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1268 && (reload_completed || reload_in_progress)
1269 && flag_pic)
1270 {
1271 operands[1] = force_const_mem (mode, operand1);
1272 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1273 mode, temp);
1274 emit_move_sequence (operands, mode, temp);
1275 }
1276 else
1277 {
1278 operands[1] = legitimize_pic_address (operand1, mode, temp);
1279 emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
1280 }
1281 }
1282 /* On the HPPA, references to data space are supposed to use dp,
1283 register 27, but showing it in the RTL inhibits various cse
1284 and loop optimizations. */
1285 else
1286 {
1287 rtx temp, set;
1288
1289 if (reload_in_progress || reload_completed)
1290 temp = scratch_reg ? scratch_reg : operand0;
1291 else
1292 temp = gen_reg_rtx (mode);
1293
1294 /* Loading a SYMBOL_REF into a register makes that register
1295 safe to be used as the base in an indexed address.
1296
1297 Don't mark hard registers though. That loses. */
1298 if (GET_CODE (operand0) == REG
1299 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1300 REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
1301 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1302 REGNO_POINTER_FLAG (REGNO (temp)) = 1;
1303 if (ishighonly)
1304 set = gen_rtx (SET, mode, operand0, temp);
1305 else
1306 set = gen_rtx (SET, VOIDmode,
1307 operand0,
1308 gen_rtx (LO_SUM, mode, temp, operand1));
1309
1310 emit_insn (gen_rtx (SET, VOIDmode,
1311 temp,
1312 gen_rtx (HIGH, mode, operand1)));
1313 emit_insn (set);
1314
1315 }
1316
1317 /* Add back in the constant part if needed. */
1318 if (const_part != NULL)
1319 expand_inc (operand0, const_part);
1320 return 1;
1321 }
1322 else if (GET_CODE (operand1) != CONST_INT
1323 || ! cint_ok_for_move (INTVAL (operand1)))
1324 {
1325 rtx temp;
1326
1327 if (reload_in_progress || reload_completed)
1328 temp = operand0;
1329 else
1330 temp = gen_reg_rtx (mode);
1331
1332 emit_insn (gen_rtx (SET, VOIDmode, temp,
1333 gen_rtx (HIGH, mode, operand1)));
1334 operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1335 }
1336 }
1337 /* Now have insn-emit do whatever it normally does. */
1338 return 0;
1339 }
1340
1341 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1342 it will need a link/runtime reloc). */
1343
1344 int
1345 reloc_needed (exp)
1346 tree exp;
1347 {
1348 int reloc = 0;
1349
1350 switch (TREE_CODE (exp))
1351 {
1352 case ADDR_EXPR:
1353 return 1;
1354
1355 case PLUS_EXPR:
1356 case MINUS_EXPR:
1357 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1358 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1359 break;
1360
1361 case NOP_EXPR:
1362 case CONVERT_EXPR:
1363 case NON_LVALUE_EXPR:
1364 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1365 break;
1366
1367 case CONSTRUCTOR:
1368 {
1369 register tree link;
1370 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1371 if (TREE_VALUE (link) != 0)
1372 reloc |= reloc_needed (TREE_VALUE (link));
1373 }
1374 break;
1375
1376 case ERROR_MARK:
1377 break;
1378 }
1379 return reloc;
1380 }
1381
1382 /* Does operand (which is a symbolic_operand) live in text space? If
1383 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1384
1385 int
1386 read_only_operand (operand)
1387 rtx operand;
1388 {
1389 if (GET_CODE (operand) == CONST)
1390 operand = XEXP (XEXP (operand, 0), 0);
1391 if (flag_pic)
1392 {
1393 if (GET_CODE (operand) == SYMBOL_REF)
1394 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1395 }
1396 else
1397 {
1398 if (GET_CODE (operand) == SYMBOL_REF)
1399 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1400 }
1401 return 1;
1402 }
1403
1404 \f
1405 /* Return the best assembler insn template
1406 for moving operands[1] into operands[0] as a fullword. */
1407 char *
1408 singlemove_string (operands)
1409 rtx *operands;
1410 {
1411 HOST_WIDE_INT intval;
1412
1413 if (GET_CODE (operands[0]) == MEM)
1414 return "stw %r1,%0";
1415 if (GET_CODE (operands[1]) == MEM)
1416 return "ldw %1,%0";
1417 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1418 {
1419 long i;
1420 REAL_VALUE_TYPE d;
1421
1422 if (GET_MODE (operands[1]) != SFmode)
1423 abort ();
1424
1425 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1426 bit pattern. */
1427 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1428 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1429
1430 operands[1] = GEN_INT (i);
1431 /* Fall through to CONST_INT case. */
1432 }
1433 if (GET_CODE (operands[1]) == CONST_INT)
1434 {
1435 intval = INTVAL (operands[1]);
1436
1437 if (VAL_14_BITS_P (intval))
1438 return "ldi %1,%0";
1439 else if ((intval & 0x7ff) == 0)
1440 return "ldil L'%1,%0";
1441 else if (zdepi_cint_p (intval))
1442 return "zdepi %Z1,%0";
1443 else
1444 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1445 }
1446 return "copy %1,%0";
1447 }
1448 \f
1449
1450 /* Compute position (in OP[1]) and width (in OP[2])
1451 useful for copying IMM to a register using the zdepi
1452 instructions. Store the immediate value to insert in OP[0]. */
1453 void
1454 compute_zdepi_operands (imm, op)
1455 unsigned HOST_WIDE_INT imm;
1456 unsigned *op;
1457 {
1458 int lsb, len;
1459
1460 /* Find the least significant set bit in IMM. */
1461 for (lsb = 0; lsb < 32; lsb++)
1462 {
1463 if ((imm & 1) != 0)
1464 break;
1465 imm >>= 1;
1466 }
1467
1468 /* Choose variants based on *sign* of the 5-bit field. */
1469 if ((imm & 0x10) == 0)
1470 len = (lsb <= 28) ? 4 : 32 - lsb;
1471 else
1472 {
1473 /* Find the width of the bitstring in IMM. */
1474 for (len = 5; len < 32; len++)
1475 {
1476 if ((imm & (1 << len)) == 0)
1477 break;
1478 }
1479
1480 /* Sign extend IMM as a 5-bit value. */
1481 imm = (imm & 0xf) - 0x10;
1482 }
1483
1484 op[0] = imm;
1485 op[1] = 31 - lsb;
1486 op[2] = len;
1487 }
1488
1489 /* Output assembler code to perform a doubleword move insn
1490 with operands OPERANDS. */
1491
1492 char *
1493 output_move_double (operands)
1494 rtx *operands;
1495 {
1496 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1497 rtx latehalf[2];
1498 rtx addreg0 = 0, addreg1 = 0;
1499
1500 /* First classify both operands. */
1501
1502 if (REG_P (operands[0]))
1503 optype0 = REGOP;
1504 else if (offsettable_memref_p (operands[0]))
1505 optype0 = OFFSOP;
1506 else if (GET_CODE (operands[0]) == MEM)
1507 optype0 = MEMOP;
1508 else
1509 optype0 = RNDOP;
1510
1511 if (REG_P (operands[1]))
1512 optype1 = REGOP;
1513 else if (CONSTANT_P (operands[1]))
1514 optype1 = CNSTOP;
1515 else if (offsettable_memref_p (operands[1]))
1516 optype1 = OFFSOP;
1517 else if (GET_CODE (operands[1]) == MEM)
1518 optype1 = MEMOP;
1519 else
1520 optype1 = RNDOP;
1521
1522 /* Check for the cases that the operand constraints are not
1523 supposed to allow to happen. Abort if we get one,
1524 because generating code for these cases is painful. */
1525
1526 if (optype0 != REGOP && optype1 != REGOP)
1527 abort ();
1528
1529 /* Handle auto decrementing and incrementing loads and stores
1530 specifically, since the structure of the function doesn't work
1531 for them without major modification. Do it better when we learn
1532 this port about the general inc/dec addressing of PA.
1533 (This was written by tege. Chide him if it doesn't work.) */
1534
1535 if (optype0 == MEMOP)
1536 {
1537 /* We have to output the address syntax ourselves, since print_operand
1538 doesn't deal with the addresses we want to use. Fix this later. */
1539
1540 rtx addr = XEXP (operands[0], 0);
1541 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1542 {
1543 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1544
1545 operands[0] = XEXP (addr, 0);
1546 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1547 abort ();
1548
1549 if (!reg_overlap_mentioned_p (high_reg, addr))
1550 {
1551 /* No overlap between high target register and address
1552 register. (We do this in a non-obvious way to
1553 save a register file writeback) */
1554 if (GET_CODE (addr) == POST_INC)
1555 return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1556 return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1557 }
1558 else
1559 abort();
1560 }
1561 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1562 {
1563 rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1564
1565 operands[0] = XEXP (addr, 0);
1566 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1567 abort ();
1568
1569 if (!reg_overlap_mentioned_p (high_reg, addr))
1570 {
1571 /* No overlap between high target register and address
1572 register. (We do this in a non-obvious way to
1573 save a register file writeback) */
1574 if (GET_CODE (addr) == PRE_INC)
1575 return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1576 return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1577 }
1578 else
1579 abort();
1580 }
1581 }
1582 if (optype1 == MEMOP)
1583 {
1584 /* We have to output the address syntax ourselves, since print_operand
1585 doesn't deal with the addresses we want to use. Fix this later. */
1586
1587 rtx addr = XEXP (operands[1], 0);
1588 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1589 {
1590 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1591
1592 operands[1] = XEXP (addr, 0);
1593 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1594 abort ();
1595
1596 if (!reg_overlap_mentioned_p (high_reg, addr))
1597 {
1598 /* No overlap between high target register and address
1599 register. (We do this in a non-obvious way to
1600 save a register file writeback) */
1601 if (GET_CODE (addr) == POST_INC)
1602 return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1603 return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1604 }
1605 else
1606 {
1607 /* This is an undefined situation. We should load into the
1608 address register *and* update that register. Probably
1609 we don't need to handle this at all. */
1610 if (GET_CODE (addr) == POST_INC)
1611 return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1612 return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1613 }
1614 }
1615 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1616 {
1617 rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1618
1619 operands[1] = XEXP (addr, 0);
1620 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1621 abort ();
1622
1623 if (!reg_overlap_mentioned_p (high_reg, addr))
1624 {
1625 /* No overlap between high target register and address
1626 register. (We do this in a non-obvious way to
1627 save a register file writeback) */
1628 if (GET_CODE (addr) == PRE_INC)
1629 return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1630 return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1631 }
1632 else
1633 {
1634 /* This is an undefined situation. We should load into the
1635 address register *and* update that register. Probably
1636 we don't need to handle this at all. */
1637 if (GET_CODE (addr) == PRE_INC)
1638 return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1639 return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1640 }
1641 }
1642 }
1643
1644 /* If an operand is an unoffsettable memory ref, find a register
1645 we can increment temporarily to make it refer to the second word. */
1646
1647 if (optype0 == MEMOP)
1648 addreg0 = find_addr_reg (XEXP (operands[0], 0));
1649
1650 if (optype1 == MEMOP)
1651 addreg1 = find_addr_reg (XEXP (operands[1], 0));
1652
1653 /* Ok, we can do one word at a time.
1654 Normally we do the low-numbered word first.
1655
1656 In either case, set up in LATEHALF the operands to use
1657 for the high-numbered word and in some cases alter the
1658 operands in OPERANDS to be suitable for the low-numbered word. */
1659
1660 if (optype0 == REGOP)
1661 latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1662 else if (optype0 == OFFSOP)
1663 latehalf[0] = adj_offsettable_operand (operands[0], 4);
1664 else
1665 latehalf[0] = operands[0];
1666
1667 if (optype1 == REGOP)
1668 latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1669 else if (optype1 == OFFSOP)
1670 latehalf[1] = adj_offsettable_operand (operands[1], 4);
1671 else if (optype1 == CNSTOP)
1672 split_double (operands[1], &operands[1], &latehalf[1]);
1673 else
1674 latehalf[1] = operands[1];
1675
1676 /* If the first move would clobber the source of the second one,
1677 do them in the other order.
1678
1679 This can happen in two cases:
1680
1681 mem -> register where the first half of the destination register
1682 is the same register used in the memory's address. Reload
1683 can create such insns.
1684
1685 mem in this case will be either register indirect or register
1686 indirect plus a valid offset.
1687
1688 register -> register move where REGNO(dst) == REGNO(src + 1)
1689 someone (Tim/Tege?) claimed this can happen for parameter loads.
1690
1691 Handle mem -> register case first. */
1692 if (optype0 == REGOP
1693 && (optype1 == MEMOP || optype1 == OFFSOP)
1694 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
1695 operands[1], 0))
1696 {
1697 /* Do the late half first. */
1698 if (addreg1)
1699 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1700 output_asm_insn (singlemove_string (latehalf), latehalf);
1701
1702 /* Then clobber. */
1703 if (addreg1)
1704 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1705 return singlemove_string (operands);
1706 }
1707
1708 /* Now handle register -> register case. */
1709 if (optype0 == REGOP && optype1 == REGOP
1710 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1711 {
1712 output_asm_insn (singlemove_string (latehalf), latehalf);
1713 return singlemove_string (operands);
1714 }
1715
1716 /* Normal case: do the two words, low-numbered first. */
1717
1718 output_asm_insn (singlemove_string (operands), operands);
1719
1720 /* Make any unoffsettable addresses point at high-numbered word. */
1721 if (addreg0)
1722 output_asm_insn ("ldo 4(%0),%0", &addreg0);
1723 if (addreg1)
1724 output_asm_insn ("ldo 4(%0),%0", &addreg1);
1725
1726 /* Do that word. */
1727 output_asm_insn (singlemove_string (latehalf), latehalf);
1728
1729 /* Undo the adds we just did. */
1730 if (addreg0)
1731 output_asm_insn ("ldo -4(%0),%0", &addreg0);
1732 if (addreg1)
1733 output_asm_insn ("ldo -4(%0),%0", &addreg1);
1734
1735 return "";
1736 }
1737 \f
1738 char *
1739 output_fp_move_double (operands)
1740 rtx *operands;
1741 {
1742 if (FP_REG_P (operands[0]))
1743 {
1744 if (FP_REG_P (operands[1])
1745 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1746 output_asm_insn ("fcpy,dbl %r1,%0", operands);
1747 else
1748 output_asm_insn ("fldd%F1 %1,%0", operands);
1749 }
1750 else if (FP_REG_P (operands[1]))
1751 {
1752 output_asm_insn ("fstd%F0 %1,%0", operands);
1753 }
1754 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1755 {
1756 if (GET_CODE (operands[0]) == REG)
1757 {
1758 rtx xoperands[2];
1759 xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1760 xoperands[0] = operands[0];
1761 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1762 }
1763 /* This is a pain. You have to be prepared to deal with an
1764 arbitrary address here including pre/post increment/decrement.
1765
1766 so avoid this in the MD. */
1767 else
1768 abort ();
1769 }
1770 else abort ();
1771 return "";
1772 }
1773 \f
1774 /* Return a REG that occurs in ADDR with coefficient 1.
1775 ADDR can be effectively incremented by incrementing REG. */
1776
1777 static rtx
1778 find_addr_reg (addr)
1779 rtx addr;
1780 {
1781 while (GET_CODE (addr) == PLUS)
1782 {
1783 if (GET_CODE (XEXP (addr, 0)) == REG)
1784 addr = XEXP (addr, 0);
1785 else if (GET_CODE (XEXP (addr, 1)) == REG)
1786 addr = XEXP (addr, 1);
1787 else if (CONSTANT_P (XEXP (addr, 0)))
1788 addr = XEXP (addr, 1);
1789 else if (CONSTANT_P (XEXP (addr, 1)))
1790 addr = XEXP (addr, 0);
1791 else
1792 abort ();
1793 }
1794 if (GET_CODE (addr) == REG)
1795 return addr;
1796 abort ();
1797 }
1798
1799 /* Emit code to perform a block move.
1800
1801 OPERANDS[0] is the destination pointer as a REG, clobbered.
1802 OPERANDS[1] is the source pointer as a REG, clobbered.
1803 OPERANDS[2] is a register for temporary storage.
1804 OPERANDS[4] is the size as a CONST_INT
1805 OPERANDS[3] is a register for temporary storage.
1806 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
1807 OPERNADS[6] is another temporary register. */
1808
1809 char *
1810 output_block_move (operands, size_is_constant)
1811 rtx *operands;
1812 int size_is_constant;
1813 {
1814 int align = INTVAL (operands[5]);
1815 unsigned long n_bytes = INTVAL (operands[4]);
1816
1817 /* We can't move more than four bytes at a time because the PA
1818 has no longer integer move insns. (Could use fp mem ops?) */
1819 if (align > 4)
1820 align = 4;
1821
1822 /* Note that we know each loop below will execute at least twice
1823 (else we would have open-coded the copy). */
1824 switch (align)
1825 {
1826 case 4:
1827 /* Pre-adjust the loop counter. */
1828 operands[4] = GEN_INT (n_bytes - 8);
1829 output_asm_insn ("ldi %4,%2", operands);
1830
1831 /* Copying loop. */
1832 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1833 output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
1834 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1835 output_asm_insn ("addib,>= -8,%2,.-12", operands);
1836 output_asm_insn ("stws,ma %6,4(0,%0)", operands);
1837
1838 /* Handle the residual. There could be up to 7 bytes of
1839 residual to copy! */
1840 if (n_bytes % 8 != 0)
1841 {
1842 operands[4] = GEN_INT (n_bytes % 4);
1843 if (n_bytes % 8 >= 4)
1844 output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1845 if (n_bytes % 4 != 0)
1846 output_asm_insn ("ldw 0(0,%1),%6", operands);
1847 if (n_bytes % 8 >= 4)
1848 output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1849 if (n_bytes % 4 != 0)
1850 output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
1851 }
1852 return "";
1853
1854 case 2:
1855 /* Pre-adjust the loop counter. */
1856 operands[4] = GEN_INT (n_bytes - 4);
1857 output_asm_insn ("ldi %4,%2", operands);
1858
1859 /* Copying loop. */
1860 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1861 output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
1862 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1863 output_asm_insn ("addib,>= -4,%2,.-12", operands);
1864 output_asm_insn ("sths,ma %6,2(0,%0)", operands);
1865
1866 /* Handle the residual. */
1867 if (n_bytes % 4 != 0)
1868 {
1869 if (n_bytes % 4 >= 2)
1870 output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1871 if (n_bytes % 2 != 0)
1872 output_asm_insn ("ldb 0(0,%1),%6", operands);
1873 if (n_bytes % 4 >= 2)
1874 output_asm_insn ("sths,ma %3,2(0,%0)", operands);
1875 if (n_bytes % 2 != 0)
1876 output_asm_insn ("stb %6,0(0,%0)", operands);
1877 }
1878 return "";
1879
1880 case 1:
1881 /* Pre-adjust the loop counter. */
1882 operands[4] = GEN_INT (n_bytes - 2);
1883 output_asm_insn ("ldi %4,%2", operands);
1884
1885 /* Copying loop. */
1886 output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1887 output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
1888 output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
1889 output_asm_insn ("addib,>= -2,%2,.-12", operands);
1890 output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
1891
1892 /* Handle the residual. */
1893 if (n_bytes % 2 != 0)
1894 {
1895 output_asm_insn ("ldb 0(0,%1),%3", operands);
1896 output_asm_insn ("stb %3,0(0,%0)", operands);
1897 }
1898 return "";
1899
1900 default:
1901 abort ();
1902 }
1903 }
1904
1905 /* Count the number of insns necessary to handle this block move.
1906
1907 Basic structure is the same as emit_block_move, except that we
1908 count insns rather than emit them. */
1909
1910 int
1911 compute_movstrsi_length (insn)
1912 rtx insn;
1913 {
1914 rtx pat = PATTERN (insn);
1915 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1916 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1917 unsigned int n_insns = 0;
1918
1919 /* We can't move more than four bytes at a time because the PA
1920 has no longer integer move insns. (Could use fp mem ops?) */
1921 if (align > 4)
1922 align = 4;
1923
1924 /* The basic opying loop. */
1925 n_insns = 6;
1926
1927 /* Residuals. */
1928 if (n_bytes % (2 * align) != 0)
1929 {
1930 /* Any residual caused by unrolling the copy loop. */
1931 if (n_bytes % (2 * align) > align)
1932 n_insns += 1;
1933
1934 /* Any residual because the number of bytes was not a
1935 multiple of the alignment. */
1936 if (n_bytes % align != 0)
1937 n_insns += 1;
1938 }
1939
1940 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
1941 return n_insns * 4;
1942 }
1943 \f
1944
1945 char *
1946 output_and (operands)
1947 rtx *operands;
1948 {
1949 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
1950 {
1951 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
1952 int ls0, ls1, ms0, p, len;
1953
1954 for (ls0 = 0; ls0 < 32; ls0++)
1955 if ((mask & (1 << ls0)) == 0)
1956 break;
1957
1958 for (ls1 = ls0; ls1 < 32; ls1++)
1959 if ((mask & (1 << ls1)) != 0)
1960 break;
1961
1962 for (ms0 = ls1; ms0 < 32; ms0++)
1963 if ((mask & (1 << ms0)) == 0)
1964 break;
1965
1966 if (ms0 != 32)
1967 abort();
1968
1969 if (ls1 == 32)
1970 {
1971 len = ls0;
1972
1973 if (len == 0)
1974 abort ();
1975
1976 operands[2] = GEN_INT (len);
1977 return "extru %1,31,%2,%0";
1978 }
1979 else
1980 {
1981 /* We could use this `depi' for the case above as well, but `depi'
1982 requires one more register file access than an `extru'. */
1983
1984 p = 31 - ls0;
1985 len = ls1 - ls0;
1986
1987 operands[2] = GEN_INT (p);
1988 operands[3] = GEN_INT (len);
1989 return "depi 0,%2,%3,%0";
1990 }
1991 }
1992 else
1993 return "and %1,%2,%0";
1994 }
1995
1996 char *
1997 output_ior (operands)
1998 rtx *operands;
1999 {
2000 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2001 int bs0, bs1, p, len;
2002
2003 if (INTVAL (operands[2]) == 0)
2004 return "copy %1,%0";
2005
2006 for (bs0 = 0; bs0 < 32; bs0++)
2007 if ((mask & (1 << bs0)) != 0)
2008 break;
2009
2010 for (bs1 = bs0; bs1 < 32; bs1++)
2011 if ((mask & (1 << bs1)) == 0)
2012 break;
2013
2014 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2015 abort();
2016
2017 p = 31 - bs0;
2018 len = bs1 - bs0;
2019
2020 operands[2] = GEN_INT (p);
2021 operands[3] = GEN_INT (len);
2022 return "depi -1,%2,%3,%0";
2023 }
2024 \f
2025 /* Output an ascii string. */
2026 void
2027 output_ascii (file, p, size)
2028 FILE *file;
2029 unsigned char *p;
2030 int size;
2031 {
2032 int i;
2033 int chars_output;
2034 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2035
2036 /* The HP assembler can only take strings of 256 characters at one
2037 time. This is a limitation on input line length, *not* the
2038 length of the string. Sigh. Even worse, it seems that the
2039 restriction is in number of input characters (see \xnn &
2040 \whatever). So we have to do this very carefully. */
2041
2042 fputs ("\t.STRING \"", file);
2043
2044 chars_output = 0;
2045 for (i = 0; i < size; i += 4)
2046 {
2047 int co = 0;
2048 int io = 0;
2049 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2050 {
2051 register unsigned int c = p[i + io];
2052
2053 if (c == '\"' || c == '\\')
2054 partial_output[co++] = '\\';
2055 if (c >= ' ' && c < 0177)
2056 partial_output[co++] = c;
2057 else
2058 {
2059 unsigned int hexd;
2060 partial_output[co++] = '\\';
2061 partial_output[co++] = 'x';
2062 hexd = c / 16 - 0 + '0';
2063 if (hexd > '9')
2064 hexd -= '9' - 'a' + 1;
2065 partial_output[co++] = hexd;
2066 hexd = c % 16 - 0 + '0';
2067 if (hexd > '9')
2068 hexd -= '9' - 'a' + 1;
2069 partial_output[co++] = hexd;
2070 }
2071 }
2072 if (chars_output + co > 243)
2073 {
2074 fputs ("\"\n\t.STRING \"", file);
2075 chars_output = 0;
2076 }
2077 fwrite (partial_output, 1, co, file);
2078 chars_output += co;
2079 co = 0;
2080 }
2081 fputs ("\"\n", file);
2082 }
2083
2084 /* Try to rewrite floating point comparisons & branches to avoid
2085 useless add,tr insns.
2086
2087 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2088 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2089 first attempt to remove useless add,tr insns. It is zero
2090 for the second pass as reorg sometimes leaves bogus REG_DEAD
2091 notes lying around.
2092
2093 When CHECK_NOTES is zero we can only eliminate add,tr insns
2094 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2095 instructions. */
2096 void
2097 remove_useless_addtr_insns (insns, check_notes)
2098 rtx insns;
2099 int check_notes;
2100 {
2101 rtx insn;
2102 int all;
2103 static int pass = 0;
2104
2105 /* This is fairly cheap, so always run it when optimizing. */
2106 if (optimize > 0)
2107 {
2108 int fcmp_count = 0;
2109 int fbranch_count = 0;
2110
2111 /* Walk all the insns in this function looking for fcmp & fbranch
2112 instructions. Keep track of how many of each we find. */
2113 insns = get_insns ();
2114 for (insn = insns; insn; insn = next_insn (insn))
2115 {
2116 rtx tmp;
2117
2118 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2119 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2120 continue;
2121
2122 tmp = PATTERN (insn);
2123
2124 /* It must be a set. */
2125 if (GET_CODE (tmp) != SET)
2126 continue;
2127
2128 /* If the destination is CCFP, then we've found an fcmp insn. */
2129 tmp = SET_DEST (tmp);
2130 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2131 {
2132 fcmp_count++;
2133 continue;
2134 }
2135
2136 tmp = PATTERN (insn);
2137 /* If this is an fbranch instruction, bump the fbranch counter. */
2138 if (GET_CODE (tmp) == SET
2139 && SET_DEST (tmp) == pc_rtx
2140 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2141 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2142 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2143 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2144 {
2145 fbranch_count++;
2146 continue;
2147 }
2148 }
2149
2150
2151 /* Find all floating point compare + branch insns. If possible,
2152 reverse the comparison & the branch to avoid add,tr insns. */
2153 for (insn = insns; insn; insn = next_insn (insn))
2154 {
2155 rtx tmp, next;
2156
2157 /* Ignore anything that isn't an INSN. */
2158 if (GET_CODE (insn) != INSN)
2159 continue;
2160
2161 tmp = PATTERN (insn);
2162
2163 /* It must be a set. */
2164 if (GET_CODE (tmp) != SET)
2165 continue;
2166
2167 /* The destination must be CCFP, which is register zero. */
2168 tmp = SET_DEST (tmp);
2169 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2170 continue;
2171
2172 /* INSN should be a set of CCFP.
2173
2174 See if the result of this insn is used in a reversed FP
2175 conditional branch. If so, reverse our condition and
2176 the branch. Doing so avoids useless add,tr insns. */
2177 next = next_insn (insn);
2178 while (next)
2179 {
2180 /* Jumps, calls and labels stop our search. */
2181 if (GET_CODE (next) == JUMP_INSN
2182 || GET_CODE (next) == CALL_INSN
2183 || GET_CODE (next) == CODE_LABEL)
2184 break;
2185
2186 /* As does another fcmp insn. */
2187 if (GET_CODE (next) == INSN
2188 && GET_CODE (PATTERN (next)) == SET
2189 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2190 && REGNO (SET_DEST (PATTERN (next))) == 0)
2191 break;
2192
2193 next = next_insn (next);
2194 }
2195
2196 /* Is NEXT_INSN a branch? */
2197 if (next
2198 && GET_CODE (next) == JUMP_INSN)
2199 {
2200 rtx pattern = PATTERN (next);
2201
2202 /* If it a reversed fp conditional branch (eg uses add,tr)
2203 and CCFP dies, then reverse our conditional and the branch
2204 to avoid the add,tr. */
2205 if (GET_CODE (pattern) == SET
2206 && SET_DEST (pattern) == pc_rtx
2207 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2208 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2209 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2210 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2211 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2212 && (fcmp_count == fbranch_count
2213 || (check_notes
2214 && find_regno_note (next, REG_DEAD, 0))))
2215 {
2216 /* Reverse the branch. */
2217 tmp = XEXP (SET_SRC (pattern), 1);
2218 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2219 XEXP (SET_SRC (pattern), 2) = tmp;
2220 INSN_CODE (next) = -1;
2221
2222 /* Reverse our condition. */
2223 tmp = PATTERN (insn);
2224 PUT_CODE (XEXP (tmp, 1),
2225 reverse_condition (GET_CODE (XEXP (tmp, 1))));
2226 }
2227 }
2228 }
2229 }
2230
2231 pass = !pass;
2232
2233 }
2234 \f
2235 /* You may have trouble believing this, but this is the HP-PA stack
2236 layout. Wow.
2237
2238 Offset Contents
2239
2240 Variable arguments (optional; any number may be allocated)
2241
2242 SP-(4*(N+9)) arg word N
2243 : :
2244 SP-56 arg word 5
2245 SP-52 arg word 4
2246
2247 Fixed arguments (must be allocated; may remain unused)
2248
2249 SP-48 arg word 3
2250 SP-44 arg word 2
2251 SP-40 arg word 1
2252 SP-36 arg word 0
2253
2254 Frame Marker
2255
2256 SP-32 External Data Pointer (DP)
2257 SP-28 External sr4
2258 SP-24 External/stub RP (RP')
2259 SP-20 Current RP
2260 SP-16 Static Link
2261 SP-12 Clean up
2262 SP-8 Calling Stub RP (RP'')
2263 SP-4 Previous SP
2264
2265 Top of Frame
2266
2267 SP-0 Stack Pointer (points to next available address)
2268
2269 */
2270
2271 /* This function saves registers as follows. Registers marked with ' are
2272 this function's registers (as opposed to the previous function's).
2273 If a frame_pointer isn't needed, r4 is saved as a general register;
2274 the space for the frame pointer is still allocated, though, to keep
2275 things simple.
2276
2277
2278 Top of Frame
2279
2280 SP (FP') Previous FP
2281 SP + 4 Alignment filler (sigh)
2282 SP + 8 Space for locals reserved here.
2283 .
2284 .
2285 .
2286 SP + n All call saved register used.
2287 .
2288 .
2289 .
2290 SP + o All call saved fp registers used.
2291 .
2292 .
2293 .
2294 SP + p (SP') points to next available address.
2295
2296 */
2297
2298 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2299 Handle case where DISP > 8k by using the add_high_const pattern.
2300
2301 Note in DISP > 8k case, we will leave the high part of the address
2302 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2303 static void
2304 store_reg (reg, disp, base)
2305 int reg, disp, base;
2306 {
2307 if (VAL_14_BITS_P (disp))
2308 {
2309 emit_move_insn (gen_rtx (MEM, SImode,
2310 gen_rtx (PLUS, SImode,
2311 gen_rtx (REG, SImode, base),
2312 GEN_INT (disp))),
2313 gen_rtx (REG, SImode, reg));
2314 }
2315 else
2316 {
2317 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2318 gen_rtx (REG, SImode, base),
2319 GEN_INT (disp)));
2320 emit_move_insn (gen_rtx (MEM, SImode,
2321 gen_rtx (LO_SUM, SImode,
2322 gen_rtx (REG, SImode, 1),
2323 GEN_INT (disp))),
2324 gen_rtx (REG, SImode, reg));
2325 }
2326 }
2327
2328 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2329 Handle case where DISP > 8k by using the add_high_const pattern.
2330
2331 Note in DISP > 8k case, we will leave the high part of the address
2332 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2333 static void
2334 load_reg (reg, disp, base)
2335 int reg, disp, base;
2336 {
2337 if (VAL_14_BITS_P (disp))
2338 {
2339 emit_move_insn (gen_rtx (REG, SImode, reg),
2340 gen_rtx (MEM, SImode,
2341 gen_rtx (PLUS, SImode,
2342 gen_rtx (REG, SImode, base),
2343 GEN_INT (disp))));
2344 }
2345 else
2346 {
2347 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2348 gen_rtx (REG, SImode, base),
2349 GEN_INT (disp)));
2350 emit_move_insn (gen_rtx (REG, SImode, reg),
2351 gen_rtx (MEM, SImode,
2352 gen_rtx (LO_SUM, SImode,
2353 gen_rtx (REG, SImode, 1),
2354 GEN_INT (disp))));
2355 }
2356 }
2357
2358 /* Emit RTL to set REG to the value specified by BASE+DISP.
2359 Handle case where DISP > 8k by using the add_high_const pattern.
2360
2361 Note in DISP > 8k case, we will leave the high part of the address
2362 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2363 static void
2364 set_reg_plus_d(reg, base, disp)
2365 int reg, base, disp;
2366 {
2367 if (VAL_14_BITS_P (disp))
2368 {
2369 emit_move_insn (gen_rtx (REG, SImode, reg),
2370 gen_rtx (PLUS, SImode,
2371 gen_rtx (REG, SImode, base),
2372 GEN_INT (disp)));
2373 }
2374 else
2375 {
2376 emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
2377 gen_rtx (REG, SImode, base),
2378 GEN_INT (disp)));
2379 emit_move_insn (gen_rtx (REG, SImode, reg),
2380 gen_rtx (LO_SUM, SImode,
2381 gen_rtx (REG, SImode, 1),
2382 GEN_INT (disp)));
2383 }
2384 }
2385
2386 /* Global variables set by FUNCTION_PROLOGUE. */
2387 /* Size of frame. Need to know this to emit return insns from
2388 leaf procedures. */
2389 static int actual_fsize;
2390 static int local_fsize, save_fregs;
2391
2392 int
2393 compute_frame_size (size, fregs_live)
2394 int size;
2395 int *fregs_live;
2396 {
2397 extern int current_function_outgoing_args_size;
2398 int i, fsize;
2399
2400 /* 8 is space for frame pointer + filler. If any frame is allocated
2401 we need to add this in because of STARTING_FRAME_OFFSET. */
2402 fsize = size + (size || frame_pointer_needed ? 8 : 0);
2403
2404 /* We must leave enough space for all the callee saved registers
2405 from 3 .. highest used callee save register since we don't
2406 know if we're going to have an inline or out of line prologue
2407 and epilogue. */
2408 for (i = 18; i >= 3; i--)
2409 if (regs_ever_live[i])
2410 {
2411 fsize += 4 * (i - 2);
2412 break;
2413 }
2414
2415 /* Round the stack. */
2416 fsize = (fsize + 7) & ~7;
2417
2418 /* We must leave enough space for all the callee saved registers
2419 from 3 .. highest used callee save register since we don't
2420 know if we're going to have an inline or out of line prologue
2421 and epilogue. */
2422 for (i = 66; i >= 48; i -= 2)
2423 if (regs_ever_live[i] || regs_ever_live[i + 1])
2424 {
2425 if (fregs_live)
2426 *fregs_live = 1;
2427
2428 fsize += 4 * (i - 46);
2429 break;
2430 }
2431
2432 fsize += current_function_outgoing_args_size;
2433 if (! leaf_function_p () || fsize)
2434 fsize += 32;
2435 return (fsize + 63) & ~63;
2436 }
2437
2438 rtx hp_profile_label_rtx;
2439 static char hp_profile_label_name[8];
2440 void
2441 output_function_prologue (file, size)
2442 FILE *file;
2443 int size;
2444 {
2445 /* The function's label and associated .PROC must never be
2446 separated and must be output *after* any profiling declarations
2447 to avoid changing spaces/subspaces within a procedure. */
2448 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2449 fputs ("\t.PROC\n", file);
2450
2451 /* hppa_expand_prologue does the dirty work now. We just need
2452 to output the assembler directives which denote the start
2453 of a function. */
2454 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2455 if (regs_ever_live[2] || profile_flag)
2456 fputs (",CALLS,SAVE_RP", file);
2457 else
2458 fputs (",NO_CALLS", file);
2459
2460 if (frame_pointer_needed)
2461 fputs (",SAVE_SP", file);
2462
2463 /* Pass on information about the number of callee register saves
2464 performed in the prologue.
2465
2466 The compiler is supposed to pass the highest register number
2467 saved, the assembler then has to adjust that number before
2468 entering it into the unwind descriptor (to account for any
2469 caller saved registers with lower register numbers than the
2470 first callee saved register). */
2471 if (gr_saved)
2472 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2473
2474 if (fr_saved)
2475 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2476
2477 fputs ("\n\t.ENTRY\n", file);
2478
2479 /* Horrid hack. emit_function_prologue will modify this RTL in
2480 place to get the expected results. */
2481 if (profile_flag)
2482 ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2483 hp_profile_labelno);
2484
2485 /* If we're using GAS and not using the portable runtime model, then
2486 we don't need to accumulate the total number of code bytes. */
2487 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2488 total_code_bytes = 0;
2489 else if (insn_addresses)
2490 {
2491 unsigned int old_total = total_code_bytes;
2492
2493 total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
2494 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2495
2496 /* Be prepared to handle overflows. */
2497 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2498 }
2499 else
2500 total_code_bytes = -1;
2501
2502 remove_useless_addtr_insns (get_insns (), 0);
2503 }
2504
2505 void
2506 hppa_expand_prologue()
2507 {
2508 extern char call_used_regs[];
2509 int size = get_frame_size ();
2510 int merge_sp_adjust_with_store = 0;
2511 int i, offset;
2512 rtx tmpreg, size_rtx;
2513
2514 gr_saved = 0;
2515 fr_saved = 0;
2516 save_fregs = 0;
2517 local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2518 actual_fsize = compute_frame_size (size, &save_fregs);
2519
2520 /* Compute a few things we will use often. */
2521 tmpreg = gen_rtx (REG, SImode, 1);
2522 size_rtx = GEN_INT (actual_fsize);
2523
2524 /* Handle out of line prologues and epilogues. */
2525 if (TARGET_SPACE)
2526 {
2527 rtx operands[2];
2528 int saves = 0;
2529 int outline_insn_count = 0;
2530 int inline_insn_count = 0;
2531
2532 /* Count the number of insns for the inline and out of line
2533 variants so we can choose one appropriately.
2534
2535 No need to screw with counting actual_fsize operations -- they're
2536 done for both inline and out of line prologues. */
2537 if (regs_ever_live[2])
2538 inline_insn_count += 1;
2539
2540 if (! cint_ok_for_move (local_fsize))
2541 outline_insn_count += 2;
2542 else
2543 outline_insn_count += 1;
2544
2545 /* Put the register save info into %r22. */
2546 for (i = 18; i >= 3; i--)
2547 if (regs_ever_live[i] && ! call_used_regs[i])
2548 {
2549 /* -1 because the stack adjustment is normally done in
2550 the same insn as a register save. */
2551 inline_insn_count += (i - 2) - 1;
2552 saves = i;
2553 break;
2554 }
2555
2556 for (i = 66; i >= 48; i -= 2)
2557 if (regs_ever_live[i] || regs_ever_live[i + 1])
2558 {
2559 /* +1 needed as we load %r1 with the start of the freg
2560 save area. */
2561 inline_insn_count += (i/2 - 23) + 1;
2562 saves |= ((i/2 - 12 ) << 16);
2563 break;
2564 }
2565
2566 if (frame_pointer_needed)
2567 inline_insn_count += 3;
2568
2569 if (! cint_ok_for_move (saves))
2570 outline_insn_count += 2;
2571 else
2572 outline_insn_count += 1;
2573
2574 if (TARGET_PORTABLE_RUNTIME)
2575 outline_insn_count += 2;
2576 else
2577 outline_insn_count += 1;
2578
2579 /* If there's a lot of insns in the prologue, then do it as
2580 an out-of-line sequence. */
2581 if (inline_insn_count > outline_insn_count)
2582 {
2583 /* Put the local_fisze into %r19. */
2584 operands[0] = gen_rtx (REG, SImode, 19);
2585 operands[1] = GEN_INT (local_fsize);
2586 emit_move_insn (operands[0], operands[1]);
2587
2588 /* Put the stack size into %r21. */
2589 operands[0] = gen_rtx (REG, SImode, 21);
2590 operands[1] = size_rtx;
2591 emit_move_insn (operands[0], operands[1]);
2592
2593 operands[0] = gen_rtx (REG, SImode, 22);
2594 operands[1] = GEN_INT (saves);
2595 emit_move_insn (operands[0], operands[1]);
2596
2597 /* Now call the out-of-line prologue. */
2598 emit_insn (gen_outline_prologue_call ());
2599 emit_insn (gen_blockage ());
2600
2601 /* Note that we're using an out-of-line prologue. */
2602 out_of_line_prologue_epilogue = 1;
2603 return;
2604 }
2605 }
2606
2607 out_of_line_prologue_epilogue = 0;
2608
2609 /* Save RP first. The calling conventions manual states RP will
2610 always be stored into the caller's frame at sp-20. */
2611 if (regs_ever_live[2] || profile_flag)
2612 store_reg (2, -20, STACK_POINTER_REGNUM);
2613
2614 /* Allocate the local frame and set up the frame pointer if needed. */
2615 if (actual_fsize)
2616 if (frame_pointer_needed)
2617 {
2618 /* Copy the old frame pointer temporarily into %r1. Set up the
2619 new stack pointer, then store away the saved old frame pointer
2620 into the stack at sp+actual_fsize and at the same time update
2621 the stack pointer by actual_fsize bytes. Two versions, first
2622 handles small (<8k) frames. The second handles large (>8k)
2623 frames. */
2624 emit_move_insn (tmpreg, frame_pointer_rtx);
2625 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2626 if (VAL_14_BITS_P (actual_fsize))
2627 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
2628 else
2629 {
2630 /* It is incorrect to store the saved frame pointer at *sp,
2631 then increment sp (writes beyond the current stack boundary).
2632
2633 So instead use stwm to store at *sp and post-increment the
2634 stack pointer as an atomic operation. Then increment sp to
2635 finish allocating the new frame. */
2636 emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
2637 set_reg_plus_d (STACK_POINTER_REGNUM,
2638 STACK_POINTER_REGNUM,
2639 actual_fsize - 64);
2640 }
2641 }
2642 /* no frame pointer needed. */
2643 else
2644 {
2645 /* In some cases we can perform the first callee register save
2646 and allocating the stack frame at the same time. If so, just
2647 make a note of it and defer allocating the frame until saving
2648 the callee registers. */
2649 if (VAL_14_BITS_P (-actual_fsize)
2650 && local_fsize == 0
2651 && ! profile_flag
2652 && ! flag_pic)
2653 merge_sp_adjust_with_store = 1;
2654 /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2655 else if (actual_fsize != 0)
2656 set_reg_plus_d (STACK_POINTER_REGNUM,
2657 STACK_POINTER_REGNUM,
2658 actual_fsize);
2659 }
2660 /* The hppa calling conventions say that that %r19, the pic offset
2661 register, is saved at sp - 32 (in this function's frame) when
2662 generating PIC code. FIXME: What is the correct thing to do
2663 for functions which make no calls and allocate no frame? Do
2664 we need to allocate a frame, or can we just omit the save? For
2665 now we'll just omit the save. */
2666 if (actual_fsize != 0 && flag_pic)
2667 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2668
2669 /* Profiling code.
2670
2671 Instead of taking one argument, the counter label, as most normal
2672 mcounts do, _mcount appears to behave differently on the HPPA. It
2673 takes the return address of the caller, the address of this routine,
2674 and the address of the label. Also, it isn't magic, so
2675 argument registers have to be preserved. */
2676 if (profile_flag)
2677 {
2678 int pc_offset, i, arg_offset, basereg, offsetadj;
2679
2680 pc_offset = 4 + (frame_pointer_needed
2681 ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2682 : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2683
2684 /* When the function has a frame pointer, use it as the base
2685 register for saving/restore registers. Else use the stack
2686 pointer. Adjust the offset according to the frame size if
2687 this function does not have a frame pointer. */
2688
2689 basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2690 : STACK_POINTER_REGNUM;
2691 offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2692
2693 /* Horrid hack. emit_function_prologue will modify this RTL in
2694 place to get the expected results. sprintf here is just to
2695 put something in the name. */
2696 sprintf(hp_profile_label_name, "LP$%04d", -1);
2697 hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2698 hp_profile_label_name);
2699 if (current_function_returns_struct)
2700 store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2701
2702 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2703 if (regs_ever_live [i])
2704 {
2705 store_reg (i, arg_offset, basereg);
2706 /* Deal with arg_offset not fitting in 14 bits. */
2707 pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2708 }
2709
2710 emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2711 emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2712 emit_move_insn (gen_rtx (REG, SImode, 24),
2713 gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2714 /* %r25 is set from within the output pattern. */
2715 emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2716
2717 /* Restore argument registers. */
2718 for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2719 if (regs_ever_live [i])
2720 load_reg (i, arg_offset, basereg);
2721
2722 if (current_function_returns_struct)
2723 load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2724
2725 }
2726
2727 /* Normal register save.
2728
2729 Do not save the frame pointer in the frame_pointer_needed case. It
2730 was done earlier. */
2731 if (frame_pointer_needed)
2732 {
2733 for (i = 18, offset = local_fsize; i >= 4; i--)
2734 if (regs_ever_live[i] && ! call_used_regs[i])
2735 {
2736 store_reg (i, offset, FRAME_POINTER_REGNUM);
2737 offset += 4;
2738 gr_saved++;
2739 }
2740 /* Account for %r3 which is saved in a special place. */
2741 gr_saved++;
2742 }
2743 /* No frame pointer needed. */
2744 else
2745 {
2746 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2747 if (regs_ever_live[i] && ! call_used_regs[i])
2748 {
2749 /* If merge_sp_adjust_with_store is nonzero, then we can
2750 optimize the first GR save. */
2751 if (merge_sp_adjust_with_store)
2752 {
2753 merge_sp_adjust_with_store = 0;
2754 emit_insn (gen_post_stwm (stack_pointer_rtx,
2755 gen_rtx (REG, SImode, i),
2756 GEN_INT (-offset)));
2757 }
2758 else
2759 store_reg (i, offset, STACK_POINTER_REGNUM);
2760 offset += 4;
2761 gr_saved++;
2762 }
2763
2764 /* If we wanted to merge the SP adjustment with a GR save, but we never
2765 did any GR saves, then just emit the adjustment here. */
2766 if (merge_sp_adjust_with_store)
2767 set_reg_plus_d (STACK_POINTER_REGNUM,
2768 STACK_POINTER_REGNUM,
2769 actual_fsize);
2770 }
2771
2772 /* Align pointer properly (doubleword boundary). */
2773 offset = (offset + 7) & ~7;
2774
2775 /* Floating point register store. */
2776 if (save_fregs)
2777 {
2778 /* First get the frame or stack pointer to the start of the FP register
2779 save area. */
2780 if (frame_pointer_needed)
2781 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2782 else
2783 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2784
2785 /* Now actually save the FP registers. */
2786 for (i = 66; i >= 48; i -= 2)
2787 {
2788 if (regs_ever_live[i] || regs_ever_live[i + 1])
2789 {
2790 emit_move_insn (gen_rtx (MEM, DFmode,
2791 gen_rtx (POST_INC, DFmode, tmpreg)),
2792 gen_rtx (REG, DFmode, i));
2793 fr_saved++;
2794 }
2795 }
2796 }
2797
2798 /* When generating PIC code it is necessary to save/restore the
2799 PIC register around each function call. We used to do this
2800 in the call patterns themselves, but that implementation
2801 made incorrect assumptions about using global variables to hold
2802 per-function rtl code generated in the backend.
2803
2804 So instead, we copy the PIC register into a reserved callee saved
2805 register in the prologue. Then after each call we reload the PIC
2806 register from the callee saved register. We also reload the PIC
2807 register from the callee saved register in the epilogue ensure the
2808 PIC register is valid at function exit.
2809
2810 This may (depending on the exact characteristics of the function)
2811 even be more efficient.
2812
2813 Avoid this if the callee saved register wasn't used (these are
2814 leaf functions). */
2815 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
2816 emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
2817 gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
2818 }
2819
2820
2821 void
2822 output_function_epilogue (file, size)
2823 FILE *file;
2824 int size;
2825 {
2826 rtx insn = get_last_insn ();
2827 int i;
2828
2829 /* hppa_expand_epilogue does the dirty work now. We just need
2830 to output the assembler directives which denote the end
2831 of a function.
2832
2833 To make debuggers happy, emit a nop if the epilogue was completely
2834 eliminated due to a volatile call as the last insn in the
2835 current function. That way the return address (in %r2) will
2836 always point to a valid instruction in the current function. */
2837
2838 /* Get the last real insn. */
2839 if (GET_CODE (insn) == NOTE)
2840 insn = prev_real_insn (insn);
2841
2842 /* If it is a sequence, then look inside. */
2843 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2844 insn = XVECEXP (PATTERN (insn), 0, 0);
2845
2846 /* If insn is a CALL_INSN, then it must be a call to a volatile
2847 function (otherwise there would be epilogue insns). */
2848 if (insn && GET_CODE (insn) == CALL_INSN)
2849 fputs ("\tnop\n", file);
2850
2851 fputs ("\t.EXIT\n\t.PROCEND\n", file);
2852
2853 /* If we have deferred plabels, then we need to switch into the data
2854 section and align it to a 4 byte boundary before we output the
2855 deferred plabels. */
2856 if (n_deferred_plabels)
2857 {
2858 data_section ();
2859 ASM_OUTPUT_ALIGN (file, 2);
2860 }
2861
2862 /* Now output the deferred plabels. */
2863 for (i = 0; i < n_deferred_plabels; i++)
2864 {
2865 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
2866 assemble_integer (deferred_plabels[i].symbol, 4, 1);
2867 }
2868 n_deferred_plabels = 0;
2869 }
2870
2871 void
2872 hppa_expand_epilogue ()
2873 {
2874 rtx tmpreg;
2875 int offset,i;
2876 int merge_sp_adjust_with_load = 0;
2877
2878 /* Handle out of line prologues and epilogues. */
2879 if (TARGET_SPACE && out_of_line_prologue_epilogue)
2880 {
2881 int saves = 0;
2882 rtx operands[2];
2883
2884 /* Put the register save info into %r22. */
2885 for (i = 18; i >= 3; i--)
2886 if (regs_ever_live[i] && ! call_used_regs[i])
2887 {
2888 saves = i;
2889 break;
2890 }
2891
2892 for (i = 66; i >= 48; i -= 2)
2893 if (regs_ever_live[i] || regs_ever_live[i + 1])
2894 {
2895 saves |= ((i/2 - 12 ) << 16);
2896 break;
2897 }
2898
2899 emit_insn (gen_blockage ());
2900
2901 /* Put the local_fisze into %r19. */
2902 operands[0] = gen_rtx (REG, SImode, 19);
2903 operands[1] = GEN_INT (local_fsize);
2904 emit_move_insn (operands[0], operands[1]);
2905
2906 /* Put the stack size into %r21. */
2907 operands[0] = gen_rtx (REG, SImode, 21);
2908 operands[1] = GEN_INT (actual_fsize);
2909 emit_move_insn (operands[0], operands[1]);
2910
2911 operands[0] = gen_rtx (REG, SImode, 22);
2912 operands[1] = GEN_INT (saves);
2913 emit_move_insn (operands[0], operands[1]);
2914
2915 /* Now call the out-of-line epilogue. */
2916 emit_insn (gen_outline_epilogue_call ());
2917 return;
2918 }
2919
2920 /* We will use this often. */
2921 tmpreg = gen_rtx (REG, SImode, 1);
2922
2923 /* Try to restore RP early to avoid load/use interlocks when
2924 RP gets used in the return (bv) instruction. This appears to still
2925 be necessary even when we schedule the prologue and epilogue. */
2926 if (frame_pointer_needed
2927 && (regs_ever_live [2] || profile_flag))
2928 load_reg (2, -20, FRAME_POINTER_REGNUM);
2929
2930 /* No frame pointer, and stack is smaller than 8k. */
2931 else if (! frame_pointer_needed
2932 && VAL_14_BITS_P (actual_fsize + 20)
2933 && (regs_ever_live[2] || profile_flag))
2934 load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2935
2936 /* General register restores. */
2937 if (frame_pointer_needed)
2938 {
2939 for (i = 18, offset = local_fsize; i >= 4; i--)
2940 if (regs_ever_live[i] && ! call_used_regs[i])
2941 {
2942 load_reg (i, offset, FRAME_POINTER_REGNUM);
2943 offset += 4;
2944 }
2945 }
2946 else
2947 {
2948 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2949 {
2950 if (regs_ever_live[i] && ! call_used_regs[i])
2951 {
2952 /* Only for the first load.
2953 merge_sp_adjust_with_load holds the register load
2954 with which we will merge the sp adjustment. */
2955 if (VAL_14_BITS_P (actual_fsize + 20)
2956 && local_fsize == 0
2957 && ! merge_sp_adjust_with_load)
2958 merge_sp_adjust_with_load = i;
2959 else
2960 load_reg (i, offset, STACK_POINTER_REGNUM);
2961 offset += 4;
2962 }
2963 }
2964 }
2965
2966 /* Align pointer properly (doubleword boundary). */
2967 offset = (offset + 7) & ~7;
2968
2969 /* FP register restores. */
2970 if (save_fregs)
2971 {
2972 /* Adjust the register to index off of. */
2973 if (frame_pointer_needed)
2974 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2975 else
2976 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2977
2978 /* Actually do the restores now. */
2979 for (i = 66; i >= 48; i -= 2)
2980 {
2981 if (regs_ever_live[i] || regs_ever_live[i + 1])
2982 {
2983 emit_move_insn (gen_rtx (REG, DFmode, i),
2984 gen_rtx (MEM, DFmode,
2985 gen_rtx (POST_INC, DFmode, tmpreg)));
2986 }
2987 }
2988 }
2989
2990 /* Emit a blockage insn here to keep these insns from being moved to
2991 an earlier spot in the epilogue, or into the main instruction stream.
2992
2993 This is necessary as we must not cut the stack back before all the
2994 restores are finished. */
2995 emit_insn (gen_blockage ());
2996 /* No frame pointer, but we have a stack greater than 8k. We restore
2997 %r2 very late in this case. (All other cases are restored as early
2998 as possible.) */
2999 if (! frame_pointer_needed
3000 && ! VAL_14_BITS_P (actual_fsize + 20)
3001 && (regs_ever_live[2] || profile_flag))
3002 {
3003 set_reg_plus_d (STACK_POINTER_REGNUM,
3004 STACK_POINTER_REGNUM,
3005 - actual_fsize);
3006
3007 /* This used to try and be clever by not depending on the value in
3008 %r30 and instead use the value held in %r1 (so that the 2nd insn
3009 which sets %r30 could be put in the delay slot of the return insn).
3010
3011 That won't work since if the stack is exactly 8k set_reg_plus_d
3012 doesn't set %r1, just %r30. */
3013 load_reg (2, - 20, STACK_POINTER_REGNUM);
3014 }
3015
3016 /* Reset stack pointer (and possibly frame pointer). The stack
3017 pointer is initially set to fp + 64 to avoid a race condition. */
3018 else if (frame_pointer_needed)
3019 {
3020 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3021 emit_insn (gen_pre_ldwm (frame_pointer_rtx,
3022 stack_pointer_rtx,
3023 GEN_INT (-64)));
3024 }
3025 /* If we were deferring a callee register restore, do it now. */
3026 else if (! frame_pointer_needed && merge_sp_adjust_with_load)
3027 emit_insn (gen_pre_ldwm (gen_rtx (REG, SImode,
3028 merge_sp_adjust_with_load),
3029 stack_pointer_rtx,
3030 GEN_INT (- actual_fsize)));
3031 else if (actual_fsize != 0)
3032 set_reg_plus_d (STACK_POINTER_REGNUM,
3033 STACK_POINTER_REGNUM,
3034 - actual_fsize);
3035 }
3036
3037 /* Fetch the return address for the frame COUNT steps up from
3038 the current frame, after the prologue. FRAMEADDR is the
3039 frame pointer of the COUNT frame.
3040
3041 We want to ignore any export stub remnants here. */
3042
3043 rtx
3044 return_addr_rtx (count, frameaddr)
3045 int count;
3046 rtx frameaddr;
3047 {
3048 rtx label;
3049 rtx saved_rp;
3050 rtx ins;
3051
3052 saved_rp = gen_reg_rtx (Pmode);
3053
3054 /* First, we start off with the normal return address pointer from
3055 -20[frameaddr]. */
3056
3057 emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
3058
3059 /* Get pointer to the instruction stream. We have to mask out the
3060 privilege level from the two low order bits of the return address
3061 pointer here so that ins will point to the start of the first
3062 instruction that would have been executed if we returned. */
3063 ins = copy_to_reg (gen_rtx (AND, Pmode,
3064 copy_to_reg (gen_rtx (MEM, Pmode, saved_rp)),
3065 MASK_RETURN_ADDR));
3066 label = gen_label_rtx ();
3067
3068 /* Check the instruction stream at the normal return address for the
3069 export stub:
3070
3071 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3072 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3073 0x00011820 | stub+16: mtsp r1,sr0
3074 0xe0400002 | stub+20: be,n 0(sr0,rp)
3075
3076 If it is an export stub, than our return address is really in
3077 -24[frameaddr]. */
3078
3079 emit_cmp_insn (gen_rtx (MEM, SImode, ins),
3080 GEN_INT (0x4bc23fd1),
3081 NE, NULL_RTX, SImode, 1, 0);
3082 emit_jump_insn (gen_bne (label));
3083
3084 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 4)),
3085 GEN_INT (0x004010a1),
3086 NE, NULL_RTX, SImode, 1, 0);
3087 emit_jump_insn (gen_bne (label));
3088
3089 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 8)),
3090 GEN_INT (0x00011820),
3091 NE, NULL_RTX, SImode, 1, 0);
3092 emit_jump_insn (gen_bne (label));
3093
3094 emit_cmp_insn (gen_rtx (MEM, SImode, plus_constant (ins, 12)),
3095 GEN_INT (0xe0400002),
3096 NE, NULL_RTX, SImode, 1, 0);
3097
3098 /* If there is no export stub then just use our initial guess of
3099 -20[frameaddr]. */
3100
3101 emit_jump_insn (gen_bne (label));
3102
3103 /* Here we know that our return address pointer points to an export
3104 stub. We don't want to return the address of the export stub,
3105 but rather the return address that leads back into user code.
3106 That return address is stored at -24[frameaddr]. */
3107
3108 emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
3109
3110 emit_label (label);
3111 return gen_rtx (MEM, Pmode, memory_address (Pmode, saved_rp));
3112 }
3113
3114 /* This is only valid once reload has completed because it depends on
3115 knowing exactly how much (if any) frame there is and...
3116
3117 It's only valid if there is no frame marker to de-allocate and...
3118
3119 It's only valid if %r2 hasn't been saved into the caller's frame
3120 (we're not profiling and %r2 isn't live anywhere). */
3121 int
3122 hppa_can_use_return_insn_p ()
3123 {
3124 return (reload_completed
3125 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3126 && ! profile_flag
3127 && ! regs_ever_live[2]
3128 && ! frame_pointer_needed);
3129 }
3130
3131 void
3132 emit_bcond_fp (code, operand0)
3133 enum rtx_code code;
3134 rtx operand0;
3135 {
3136 emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
3137 gen_rtx (IF_THEN_ELSE, VOIDmode,
3138 gen_rtx (code, VOIDmode,
3139 gen_rtx (REG, CCFPmode, 0),
3140 const0_rtx),
3141 gen_rtx (LABEL_REF, VOIDmode, operand0),
3142 pc_rtx)));
3143
3144 }
3145
3146 rtx
3147 gen_cmp_fp (code, operand0, operand1)
3148 enum rtx_code code;
3149 rtx operand0, operand1;
3150 {
3151 return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
3152 gen_rtx (code, CCFPmode, operand0, operand1));
3153 }
3154
3155 /* Adjust the cost of a scheduling dependency. Return the new cost of
3156 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3157
3158 int
3159 pa_adjust_cost (insn, link, dep_insn, cost)
3160 rtx insn;
3161 rtx link;
3162 rtx dep_insn;
3163 int cost;
3164 {
3165 if (! recog_memoized (insn))
3166 return 0;
3167
3168 if (REG_NOTE_KIND (link) == 0)
3169 {
3170 /* Data dependency; DEP_INSN writes a register that INSN reads some
3171 cycles later. */
3172
3173 if (get_attr_type (insn) == TYPE_FPSTORE)
3174 {
3175 rtx pat = PATTERN (insn);
3176 rtx dep_pat = PATTERN (dep_insn);
3177 if (GET_CODE (pat) == PARALLEL)
3178 {
3179 /* This happens for the fstXs,mb patterns. */
3180 pat = XVECEXP (pat, 0, 0);
3181 }
3182 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3183 /* If this happens, we have to extend this to schedule
3184 optimally. Return 0 for now. */
3185 return 0;
3186
3187 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3188 {
3189 if (! recog_memoized (dep_insn))
3190 return 0;
3191 /* DEP_INSN is writing its result to the register
3192 being stored in the fpstore INSN. */
3193 switch (get_attr_type (dep_insn))
3194 {
3195 case TYPE_FPLOAD:
3196 /* This cost 3 cycles, not 2 as the md says for the
3197 700 and 7100. Note scaling of cost for 7100. */
3198 return cost + (pa_cpu == PROCESSOR_700) ? 1 : 2;
3199
3200 case TYPE_FPALU:
3201 case TYPE_FPMULSGL:
3202 case TYPE_FPMULDBL:
3203 case TYPE_FPDIVSGL:
3204 case TYPE_FPDIVDBL:
3205 case TYPE_FPSQRTSGL:
3206 case TYPE_FPSQRTDBL:
3207 /* In these important cases, we save one cycle compared to
3208 when flop instruction feed each other. */
3209 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3210
3211 default:
3212 return cost;
3213 }
3214 }
3215 }
3216
3217 /* For other data dependencies, the default cost specified in the
3218 md is correct. */
3219 return cost;
3220 }
3221 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3222 {
3223 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3224 cycles later. */
3225
3226 if (get_attr_type (insn) == TYPE_FPLOAD)
3227 {
3228 rtx pat = PATTERN (insn);
3229 rtx dep_pat = PATTERN (dep_insn);
3230 if (GET_CODE (pat) == PARALLEL)
3231 {
3232 /* This happens for the fldXs,mb patterns. */
3233 pat = XVECEXP (pat, 0, 0);
3234 }
3235 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3236 /* If this happens, we have to extend this to schedule
3237 optimally. Return 0 for now. */
3238 return 0;
3239
3240 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3241 {
3242 if (! recog_memoized (dep_insn))
3243 return 0;
3244 switch (get_attr_type (dep_insn))
3245 {
3246 case TYPE_FPALU:
3247 case TYPE_FPMULSGL:
3248 case TYPE_FPMULDBL:
3249 case TYPE_FPDIVSGL:
3250 case TYPE_FPDIVDBL:
3251 case TYPE_FPSQRTSGL:
3252 case TYPE_FPSQRTDBL:
3253 /* A fpload can't be issued until one cycle before a
3254 preceding arithmetic operation has finished if
3255 the target of the fpload is any of the sources
3256 (or destination) of the arithmetic operation. */
3257 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3258
3259 default:
3260 return 0;
3261 }
3262 }
3263 }
3264 else if (get_attr_type (insn) == TYPE_FPALU)
3265 {
3266 rtx pat = PATTERN (insn);
3267 rtx dep_pat = PATTERN (dep_insn);
3268 if (GET_CODE (pat) == PARALLEL)
3269 {
3270 /* This happens for the fldXs,mb patterns. */
3271 pat = XVECEXP (pat, 0, 0);
3272 }
3273 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3274 /* If this happens, we have to extend this to schedule
3275 optimally. Return 0 for now. */
3276 return 0;
3277
3278 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3279 {
3280 if (! recog_memoized (dep_insn))
3281 return 0;
3282 switch (get_attr_type (dep_insn))
3283 {
3284 case TYPE_FPDIVSGL:
3285 case TYPE_FPDIVDBL:
3286 case TYPE_FPSQRTSGL:
3287 case TYPE_FPSQRTDBL:
3288 /* An ALU flop can't be issued until two cycles before a
3289 preceding divide or sqrt operation has finished if
3290 the target of the ALU flop is any of the sources
3291 (or destination) of the divide or sqrt operation. */
3292 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3293
3294 default:
3295 return 0;
3296 }
3297 }
3298 }
3299
3300 /* For other anti dependencies, the cost is 0. */
3301 return 0;
3302 }
3303 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3304 {
3305 /* Output dependency; DEP_INSN writes a register that INSN writes some
3306 cycles later. */
3307 if (get_attr_type (insn) == TYPE_FPLOAD)
3308 {
3309 rtx pat = PATTERN (insn);
3310 rtx dep_pat = PATTERN (dep_insn);
3311 if (GET_CODE (pat) == PARALLEL)
3312 {
3313 /* This happens for the fldXs,mb patterns. */
3314 pat = XVECEXP (pat, 0, 0);
3315 }
3316 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3317 /* If this happens, we have to extend this to schedule
3318 optimally. Return 0 for now. */
3319 return 0;
3320
3321 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3322 {
3323 if (! recog_memoized (dep_insn))
3324 return 0;
3325 switch (get_attr_type (dep_insn))
3326 {
3327 case TYPE_FPALU:
3328 case TYPE_FPMULSGL:
3329 case TYPE_FPMULDBL:
3330 case TYPE_FPDIVSGL:
3331 case TYPE_FPDIVDBL:
3332 case TYPE_FPSQRTSGL:
3333 case TYPE_FPSQRTDBL:
3334 /* A fpload can't be issued until one cycle before a
3335 preceding arithmetic operation has finished if
3336 the target of the fpload is the destination of the
3337 arithmetic operation. */
3338 return cost - (pa_cpu == PROCESSOR_700) ? 1 : 2;
3339
3340 default:
3341 return 0;
3342 }
3343 }
3344 }
3345 else if (get_attr_type (insn) == TYPE_FPALU)
3346 {
3347 rtx pat = PATTERN (insn);
3348 rtx dep_pat = PATTERN (dep_insn);
3349 if (GET_CODE (pat) == PARALLEL)
3350 {
3351 /* This happens for the fldXs,mb patterns. */
3352 pat = XVECEXP (pat, 0, 0);
3353 }
3354 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3355 /* If this happens, we have to extend this to schedule
3356 optimally. Return 0 for now. */
3357 return 0;
3358
3359 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3360 {
3361 if (! recog_memoized (dep_insn))
3362 return 0;
3363 switch (get_attr_type (dep_insn))
3364 {
3365 case TYPE_FPDIVSGL:
3366 case TYPE_FPDIVDBL:
3367 case TYPE_FPSQRTSGL:
3368 case TYPE_FPSQRTDBL:
3369 /* An ALU flop can't be issued until two cycles before a
3370 preceding divide or sqrt operation has finished if
3371 the target of the ALU flop is also the target of
3372 of the divide or sqrt operation. */
3373 return cost - (pa_cpu == PROCESSOR_700) ? 2 : 4;
3374
3375 default:
3376 return 0;
3377 }
3378 }
3379 }
3380
3381 /* For other output dependencies, the cost is 0. */
3382 return 0;
3383 }
3384 else
3385 abort ();
3386 }
3387
3388 /* Return any length adjustment needed by INSN which already has its length
3389 computed as LENGTH. Return zero if no adjustment is necessary.
3390
3391 For the PA: function calls, millicode calls, and backwards short
3392 conditional branches with unfilled delay slots need an adjustment by +1
3393 (to account for the NOP which will be inserted into the instruction stream).
3394
3395 Also compute the length of an inline block move here as it is too
3396 complicated to express as a length attribute in pa.md. */
3397 int
3398 pa_adjust_insn_length (insn, length)
3399 rtx insn;
3400 int length;
3401 {
3402 rtx pat = PATTERN (insn);
3403
3404 /* Call insns which are *not* indirect and have unfilled delay slots. */
3405 if (GET_CODE (insn) == CALL_INSN)
3406 {
3407
3408 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3409 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3410 return 4;
3411 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3412 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3413 == SYMBOL_REF)
3414 return 4;
3415 else
3416 return 0;
3417 }
3418 /* Jumps inside switch tables which have unfilled delay slots
3419 also need adjustment. */
3420 else if (GET_CODE (insn) == JUMP_INSN
3421 && simplejump_p (insn)
3422 && GET_MODE (PATTERN (insn)) == DImode)
3423 return 4;
3424 /* Millicode insn with an unfilled delay slot. */
3425 else if (GET_CODE (insn) == INSN
3426 && GET_CODE (pat) != SEQUENCE
3427 && GET_CODE (pat) != USE
3428 && GET_CODE (pat) != CLOBBER
3429 && get_attr_type (insn) == TYPE_MILLI)
3430 return 4;
3431 /* Block move pattern. */
3432 else if (GET_CODE (insn) == INSN
3433 && GET_CODE (pat) == PARALLEL
3434 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3435 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3436 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3437 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3438 return compute_movstrsi_length (insn) - 4;
3439 /* Conditional branch with an unfilled delay slot. */
3440 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3441 {
3442 /* Adjust a short backwards conditional with an unfilled delay slot. */
3443 if (GET_CODE (pat) == SET
3444 && length == 4
3445 && ! forward_branch_p (insn))
3446 return 4;
3447 else if (GET_CODE (pat) == PARALLEL
3448 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3449 && length == 4)
3450 return 4;
3451 /* Adjust dbra insn with short backwards conditional branch with
3452 unfilled delay slot -- only for case where counter is in a
3453 general register register. */
3454 else if (GET_CODE (pat) == PARALLEL
3455 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3456 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3457 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3458 && length == 4
3459 && ! forward_branch_p (insn))
3460 return 4;
3461 else
3462 return 0;
3463 }
3464 return 0;
3465 }
3466
3467 /* Print operand X (an rtx) in assembler syntax to file FILE.
3468 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3469 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3470
3471 void
3472 print_operand (file, x, code)
3473 FILE *file;
3474 rtx x;
3475 int code;
3476 {
3477 switch (code)
3478 {
3479 case '#':
3480 /* Output a 'nop' if there's nothing for the delay slot. */
3481 if (dbr_sequence_length () == 0)
3482 fputs ("\n\tnop", file);
3483 return;
3484 case '*':
3485 /* Output an nullification completer if there's nothing for the */
3486 /* delay slot or nullification is requested. */
3487 if (dbr_sequence_length () == 0 ||
3488 (final_sequence &&
3489 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3490 fputs (",n", file);
3491 return;
3492 case 'R':
3493 /* Print out the second register name of a register pair.
3494 I.e., R (6) => 7. */
3495 fputs (reg_names[REGNO (x)+1], file);
3496 return;
3497 case 'r':
3498 /* A register or zero. */
3499 if (x == const0_rtx
3500 || (x == CONST0_RTX (DFmode))
3501 || (x == CONST0_RTX (SFmode)))
3502 {
3503 fputs ("0", file);
3504 return;
3505 }
3506 else
3507 break;
3508 case 'C': /* Plain (C)ondition */
3509 case 'X':
3510 switch (GET_CODE (x))
3511 {
3512 case EQ:
3513 fputs ("=", file); break;
3514 case NE:
3515 fputs ("<>", file); break;
3516 case GT:
3517 fputs (">", file); break;
3518 case GE:
3519 fputs (">=", file); break;
3520 case GEU:
3521 fputs (">>=", file); break;
3522 case GTU:
3523 fputs (">>", file); break;
3524 case LT:
3525 fputs ("<", file); break;
3526 case LE:
3527 fputs ("<=", file); break;
3528 case LEU:
3529 fputs ("<<=", file); break;
3530 case LTU:
3531 fputs ("<<", file); break;
3532 default:
3533 abort ();
3534 }
3535 return;
3536 case 'N': /* Condition, (N)egated */
3537 switch (GET_CODE (x))
3538 {
3539 case EQ:
3540 fputs ("<>", file); break;
3541 case NE:
3542 fputs ("=", file); break;
3543 case GT:
3544 fputs ("<=", file); break;
3545 case GE:
3546 fputs ("<", file); break;
3547 case GEU:
3548 fputs ("<<", file); break;
3549 case GTU:
3550 fputs ("<<=", file); break;
3551 case LT:
3552 fputs (">=", file); break;
3553 case LE:
3554 fputs (">", file); break;
3555 case LEU:
3556 fputs (">>", file); break;
3557 case LTU:
3558 fputs (">>=", file); break;
3559 default:
3560 abort ();
3561 }
3562 return;
3563 /* For floating point comparisons. Need special conditions to deal
3564 with NaNs properly. */
3565 case 'Y':
3566 switch (GET_CODE (x))
3567 {
3568 case EQ:
3569 fputs ("!=", file); break;
3570 case NE:
3571 fputs ("=", file); break;
3572 case GT:
3573 fputs ("<=", file); break;
3574 case GE:
3575 fputs ("<", file); break;
3576 case LT:
3577 fputs (">=", file); break;
3578 case LE:
3579 fputs (">", file); break;
3580 default:
3581 abort ();
3582 }
3583 return;
3584 case 'S': /* Condition, operands are (S)wapped. */
3585 switch (GET_CODE (x))
3586 {
3587 case EQ:
3588 fputs ("=", file); break;
3589 case NE:
3590 fputs ("<>", file); break;
3591 case GT:
3592 fputs ("<", file); break;
3593 case GE:
3594 fputs ("<=", file); break;
3595 case GEU:
3596 fputs ("<<=", file); break;
3597 case GTU:
3598 fputs ("<<", file); break;
3599 case LT:
3600 fputs (">", file); break;
3601 case LE:
3602 fputs (">=", file); break;
3603 case LEU:
3604 fputs (">>=", file); break;
3605 case LTU:
3606 fputs (">>", file); break;
3607 default:
3608 abort ();
3609 }
3610 return;
3611 case 'B': /* Condition, (B)oth swapped and negate. */
3612 switch (GET_CODE (x))
3613 {
3614 case EQ:
3615 fputs ("<>", file); break;
3616 case NE:
3617 fputs ("=", file); break;
3618 case GT:
3619 fputs (">=", file); break;
3620 case GE:
3621 fputs (">", file); break;
3622 case GEU:
3623 fputs (">>", file); break;
3624 case GTU:
3625 fputs (">>=", file); break;
3626 case LT:
3627 fputs ("<=", file); break;
3628 case LE:
3629 fputs ("<", file); break;
3630 case LEU:
3631 fputs ("<<", file); break;
3632 case LTU:
3633 fputs ("<<=", file); break;
3634 default:
3635 abort ();
3636 }
3637 return;
3638 case 'k':
3639 if (GET_CODE (x) == CONST_INT)
3640 {
3641 fprintf (file, "%d", ~INTVAL (x));
3642 return;
3643 }
3644 abort();
3645 case 'L':
3646 if (GET_CODE (x) == CONST_INT)
3647 {
3648 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
3649 return;
3650 }
3651 abort();
3652 case 'O':
3653 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
3654 {
3655 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3656 return;
3657 }
3658 abort();
3659 case 'P':
3660 if (GET_CODE (x) == CONST_INT)
3661 {
3662 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
3663 return;
3664 }
3665 abort();
3666 case 'I':
3667 if (GET_CODE (x) == CONST_INT)
3668 fputs ("i", file);
3669 return;
3670 case 'M':
3671 case 'F':
3672 switch (GET_CODE (XEXP (x, 0)))
3673 {
3674 case PRE_DEC:
3675 case PRE_INC:
3676 fputs ("s,mb", file);
3677 break;
3678 case POST_DEC:
3679 case POST_INC:
3680 fputs ("s,ma", file);
3681 break;
3682 case PLUS:
3683 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3684 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3685 fputs ("x,s", file);
3686 else if (code == 'F')
3687 fputs ("s", file);
3688 break;
3689 default:
3690 if (code == 'F')
3691 fputs ("s", file);
3692 break;
3693 }
3694 return;
3695 case 'G':
3696 output_global_address (file, x, 0);
3697 return;
3698 case 'H':
3699 output_global_address (file, x, 1);
3700 return;
3701 case 0: /* Don't do anything special */
3702 break;
3703 case 'Z':
3704 {
3705 unsigned op[3];
3706 compute_zdepi_operands (INTVAL (x), op);
3707 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
3708 return;
3709 }
3710 default:
3711 abort ();
3712 }
3713 if (GET_CODE (x) == REG)
3714 {
3715 fputs (reg_names [REGNO (x)], file);
3716 if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
3717 fputs ("L", file);
3718 }
3719 else if (GET_CODE (x) == MEM)
3720 {
3721 int size = GET_MODE_SIZE (GET_MODE (x));
3722 rtx base = XEXP (XEXP (x, 0), 0);
3723 switch (GET_CODE (XEXP (x, 0)))
3724 {
3725 case PRE_DEC:
3726 case POST_DEC:
3727 fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
3728 break;
3729 case PRE_INC:
3730 case POST_INC:
3731 fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
3732 break;
3733 default:
3734 if (GET_CODE (XEXP (x, 0)) == PLUS
3735 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
3736 fprintf (file, "%s(0,%s)",
3737 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
3738 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
3739 else if (GET_CODE (XEXP (x, 0)) == PLUS
3740 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
3741 fprintf (file, "%s(0,%s)",
3742 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
3743 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
3744 else
3745 output_address (XEXP (x, 0));
3746 break;
3747 }
3748 }
3749 else
3750 output_addr_const (file, x);
3751 }
3752
3753 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
3754
3755 void
3756 output_global_address (file, x, round_constant)
3757 FILE *file;
3758 rtx x;
3759 int round_constant;
3760 {
3761
3762 /* Imagine (high (const (plus ...))). */
3763 if (GET_CODE (x) == HIGH)
3764 x = XEXP (x, 0);
3765
3766 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
3767 assemble_name (file, XSTR (x, 0));
3768 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
3769 {
3770 assemble_name (file, XSTR (x, 0));
3771 fputs ("-$global$", file);
3772 }
3773 else if (GET_CODE (x) == CONST)
3774 {
3775 char *sep = "";
3776 int offset = 0; /* assembler wants -$global$ at end */
3777 rtx base;
3778
3779 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3780 {
3781 base = XEXP (XEXP (x, 0), 0);
3782 output_addr_const (file, base);
3783 }
3784 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
3785 offset = INTVAL (XEXP (XEXP (x, 0), 0));
3786 else abort ();
3787
3788 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
3789 {
3790 base = XEXP (XEXP (x, 0), 1);
3791 output_addr_const (file, base);
3792 }
3793 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3794 offset = INTVAL (XEXP (XEXP (x, 0),1));
3795 else abort ();
3796
3797 /* How bogus. The compiler is apparently responsible for
3798 rounding the constant if it uses an LR field selector.
3799
3800 The linker and/or assembler seem a better place since
3801 they have to do this kind of thing already.
3802
3803 If we fail to do this, HP's optimizing linker may eliminate
3804 an addil, but not update the ldw/stw/ldo instruction that
3805 uses the result of the addil. */
3806 if (round_constant)
3807 offset = ((offset + 0x1000) & ~0x1fff);
3808
3809 if (GET_CODE (XEXP (x, 0)) == PLUS)
3810 {
3811 if (offset < 0)
3812 {
3813 offset = -offset;
3814 sep = "-";
3815 }
3816 else
3817 sep = "+";
3818 }
3819 else if (GET_CODE (XEXP (x, 0)) == MINUS
3820 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3821 sep = "-";
3822 else abort ();
3823
3824 if (!read_only_operand (base) && !flag_pic)
3825 fputs ("-$global$", file);
3826 if (offset)
3827 fprintf (file,"%s%d", sep, offset);
3828 }
3829 else
3830 output_addr_const (file, x);
3831 }
3832
3833 /* HP's millicode routines mean something special to the assembler.
3834 Keep track of which ones we have used. */
3835
3836 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
3837 static char imported[(int)end1000];
3838 static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
3839 static char import_string[] = ".IMPORT $$....,MILLICODE";
3840 #define MILLI_START 10
3841
3842 static void
3843 import_milli (code)
3844 enum millicodes code;
3845 {
3846 char str[sizeof (import_string)];
3847
3848 if (!imported[(int)code])
3849 {
3850 imported[(int)code] = 1;
3851 strcpy (str, import_string);
3852 strncpy (str + MILLI_START, milli_names[(int)code], 4);
3853 output_asm_insn (str, 0);
3854 }
3855 }
3856
3857 /* The register constraints have put the operands and return value in
3858 the proper registers. */
3859
3860 char *
3861 output_mul_insn (unsignedp, insn)
3862 int unsignedp;
3863 rtx insn;
3864 {
3865 import_milli (mulI);
3866 return output_millicode_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"));
3867 }
3868
3869 /* Emit the rtl for doing a division by a constant. */
3870
3871 /* Do magic division millicodes exist for this value? */
3872 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3873 1, 1};
3874
3875 /* We'll use an array to keep track of the magic millicodes and
3876 whether or not we've used them already. [n][0] is signed, [n][1] is
3877 unsigned. */
3878
3879 static int div_milli[16][2];
3880
3881 int
3882 div_operand (op, mode)
3883 rtx op;
3884 enum machine_mode mode;
3885 {
3886 return (mode == SImode
3887 && ((GET_CODE (op) == REG && REGNO (op) == 25)
3888 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3889 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3890 }
3891
3892 int
3893 emit_hpdiv_const (operands, unsignedp)
3894 rtx *operands;
3895 int unsignedp;
3896 {
3897 if (GET_CODE (operands[2]) == CONST_INT
3898 && INTVAL (operands[2]) > 0
3899 && INTVAL (operands[2]) < 16
3900 && magic_milli[INTVAL (operands[2])])
3901 {
3902 emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3903 emit
3904 (gen_rtx
3905 (PARALLEL, VOIDmode,
3906 gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3907 gen_rtx (unsignedp ? UDIV : DIV, SImode,
3908 gen_rtx (REG, SImode, 26),
3909 operands[2])),
3910 gen_rtx (CLOBBER, VOIDmode, operands[3]),
3911 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3912 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3913 gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3914 emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3915 return 1;
3916 }
3917 return 0;
3918 }
3919
3920 char *
3921 output_div_insn (operands, unsignedp, insn)
3922 rtx *operands;
3923 int unsignedp;
3924 rtx insn;
3925 {
3926 int divisor;
3927
3928 /* If the divisor is a constant, try to use one of the special
3929 opcodes .*/
3930 if (GET_CODE (operands[0]) == CONST_INT)
3931 {
3932 static char buf[100];
3933 divisor = INTVAL (operands[0]);
3934 if (!div_milli[divisor][unsignedp])
3935 {
3936 div_milli[divisor][unsignedp] = 1;
3937 if (unsignedp)
3938 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
3939 else
3940 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
3941 }
3942 if (unsignedp)
3943 {
3944 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
3945 return output_millicode_call (insn,
3946 gen_rtx (SYMBOL_REF, SImode, buf));
3947 }
3948 else
3949 {
3950 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
3951 return output_millicode_call (insn,
3952 gen_rtx (SYMBOL_REF, SImode, buf));
3953 }
3954 }
3955 /* Divisor isn't a special constant. */
3956 else
3957 {
3958 if (unsignedp)
3959 {
3960 import_milli (divU);
3961 return output_millicode_call (insn,
3962 gen_rtx (SYMBOL_REF, SImode, "$$divU"));
3963 }
3964 else
3965 {
3966 import_milli (divI);
3967 return output_millicode_call (insn,
3968 gen_rtx (SYMBOL_REF, SImode, "$$divI"));
3969 }
3970 }
3971 }
3972
3973 /* Output a $$rem millicode to do mod. */
3974
3975 char *
3976 output_mod_insn (unsignedp, insn)
3977 int unsignedp;
3978 rtx insn;
3979 {
3980 if (unsignedp)
3981 {
3982 import_milli (remU);
3983 return output_millicode_call (insn,
3984 gen_rtx (SYMBOL_REF, SImode, "$$remU"));
3985 }
3986 else
3987 {
3988 import_milli (remI);
3989 return output_millicode_call (insn,
3990 gen_rtx (SYMBOL_REF, SImode, "$$remI"));
3991 }
3992 }
3993
3994 void
3995 output_arg_descriptor (call_insn)
3996 rtx call_insn;
3997 {
3998 char *arg_regs[4];
3999 enum machine_mode arg_mode;
4000 rtx link;
4001 int i, output_flag = 0;
4002 int regno;
4003
4004 for (i = 0; i < 4; i++)
4005 arg_regs[i] = 0;
4006
4007 /* Specify explicitly that no argument relocations should take place
4008 if using the portable runtime calling conventions. */
4009 if (TARGET_PORTABLE_RUNTIME)
4010 {
4011 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4012 asm_out_file);
4013 return;
4014 }
4015
4016 if (GET_CODE (call_insn) != CALL_INSN)
4017 abort ();
4018 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4019 {
4020 rtx use = XEXP (link, 0);
4021
4022 if (! (GET_CODE (use) == USE
4023 && GET_CODE (XEXP (use, 0)) == REG
4024 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4025 continue;
4026
4027 arg_mode = GET_MODE (XEXP (use, 0));
4028 regno = REGNO (XEXP (use, 0));
4029 if (regno >= 23 && regno <= 26)
4030 {
4031 arg_regs[26 - regno] = "GR";
4032 if (arg_mode == DImode)
4033 arg_regs[25 - regno] = "GR";
4034 }
4035 else if (regno >= 32 && regno <= 39)
4036 {
4037 if (arg_mode == SFmode)
4038 arg_regs[(regno - 32) / 2] = "FR";
4039 else
4040 {
4041 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4042 arg_regs[(regno - 34) / 2] = "FR";
4043 arg_regs[(regno - 34) / 2 + 1] = "FU";
4044 #else
4045 arg_regs[(regno - 34) / 2] = "FU";
4046 arg_regs[(regno - 34) / 2 + 1] = "FR";
4047 #endif
4048 }
4049 }
4050 }
4051 fputs ("\t.CALL ", asm_out_file);
4052 for (i = 0; i < 4; i++)
4053 {
4054 if (arg_regs[i])
4055 {
4056 if (output_flag++)
4057 fputc (',', asm_out_file);
4058 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4059 }
4060 }
4061 fputc ('\n', asm_out_file);
4062 }
4063 \f
4064 /* Return the class of any secondary reload register that is needed to
4065 move IN into a register in class CLASS using mode MODE.
4066
4067 Profiling has showed this routine and its descendants account for
4068 a significant amount of compile time (~7%). So it has been
4069 optimized to reduce redundant computations and eliminate useless
4070 function calls.
4071
4072 It might be worthwhile to try and make this a leaf function too. */
4073
4074 enum reg_class
4075 secondary_reload_class (class, mode, in)
4076 enum reg_class class;
4077 enum machine_mode mode;
4078 rtx in;
4079 {
4080 int regno, is_symbolic;
4081
4082 /* Trying to load a constant into a FP register during PIC code
4083 generation will require %r1 as a scratch register. */
4084 if (flag_pic == 2
4085 && GET_MODE_CLASS (mode) == MODE_INT
4086 && FP_REG_CLASS_P (class)
4087 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4088 return R1_REGS;
4089
4090 /* Profiling showed the PA port spends about 1.3% of its compilation
4091 time in true_regnum from calls inside secondary_reload_class. */
4092
4093 if (GET_CODE (in) == REG)
4094 {
4095 regno = REGNO (in);
4096 if (regno >= FIRST_PSEUDO_REGISTER)
4097 regno = true_regnum (in);
4098 }
4099 else if (GET_CODE (in) == SUBREG)
4100 regno = true_regnum (in);
4101 else
4102 regno = -1;
4103
4104 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4105 && GET_MODE_CLASS (mode) == MODE_INT
4106 && FP_REG_CLASS_P (class))
4107 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4108 return GENERAL_REGS;
4109
4110 if (GET_CODE (in) == HIGH)
4111 in = XEXP (in, 0);
4112
4113 /* Profiling has showed GCC spends about 2.6% of its compilation
4114 time in symbolic_operand from calls inside secondary_reload_class.
4115
4116 We use an inline copy and only compute its return value once to avoid
4117 useless work. */
4118 switch (GET_CODE (in))
4119 {
4120 rtx tmp;
4121
4122 case SYMBOL_REF:
4123 case LABEL_REF:
4124 is_symbolic = 1;
4125 break;
4126 case CONST:
4127 tmp = XEXP (in, 0);
4128 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4129 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4130 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4131 break;
4132 default:
4133 is_symbolic = 0;
4134 break;
4135 }
4136
4137 if (!flag_pic
4138 && is_symbolic
4139 && read_only_operand (in))
4140 return NO_REGS;
4141
4142 if (class != R1_REGS && is_symbolic)
4143 return R1_REGS;
4144
4145 return NO_REGS;
4146 }
4147
4148 enum direction
4149 function_arg_padding (mode, type)
4150 enum machine_mode mode;
4151 tree type;
4152 {
4153 int size;
4154
4155 if (mode == BLKmode)
4156 {
4157 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4158 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4159 else
4160 return upward; /* Don't know if this is right, but */
4161 /* same as old definition. */
4162 }
4163 else
4164 size = GET_MODE_BITSIZE (mode);
4165 if (size < PARM_BOUNDARY)
4166 return downward;
4167 else if (size % PARM_BOUNDARY)
4168 return upward;
4169 else
4170 return none;
4171 }
4172
4173 \f
4174 /* Do what is necessary for `va_start'. The argument is ignored;
4175 We look at the current function to determine if stdargs or varargs
4176 is used and fill in an initial va_list. A pointer to this constructor
4177 is returned. */
4178
4179 struct rtx_def *
4180 hppa_builtin_saveregs (arglist)
4181 tree arglist;
4182 {
4183 rtx offset;
4184 tree fntype = TREE_TYPE (current_function_decl);
4185 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4186 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4187 != void_type_node)))
4188 ? UNITS_PER_WORD : 0);
4189
4190 if (argadj)
4191 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4192 else
4193 offset = current_function_arg_offset_rtx;
4194
4195 /* Store general registers on the stack. */
4196 move_block_from_reg (23,
4197 gen_rtx (MEM, BLKmode,
4198 plus_constant
4199 (current_function_internal_arg_pointer, -16)),
4200 4, 4 * UNITS_PER_WORD);
4201 return copy_to_reg (expand_binop (Pmode, add_optab,
4202 current_function_internal_arg_pointer,
4203 offset, 0, 0, OPTAB_LIB_WIDEN));
4204 }
4205
4206 /* This routine handles all the normal conditional branch sequences we
4207 might need to generate. It handles compare immediate vs compare
4208 register, nullification of delay slots, varying length branches,
4209 negated branches, and all combinations of the above. It returns the
4210 output appropriate to emit the branch corresponding to all given
4211 parameters. */
4212
4213 char *
4214 output_cbranch (operands, nullify, length, negated, insn)
4215 rtx *operands;
4216 int nullify, length, negated;
4217 rtx insn;
4218 {
4219 static char buf[100];
4220 int useskip = 0;
4221
4222 /* A conditional branch to the following instruction (eg the delay slot) is
4223 asking for a disaster. This can happen when not optimizing.
4224
4225 In such cases it is safe to emit nothing. */
4226
4227 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4228 return "";
4229
4230 /* If this is a long branch with its delay slot unfilled, set `nullify'
4231 as it can nullify the delay slot and save a nop. */
4232 if (length == 8 && dbr_sequence_length () == 0)
4233 nullify = 1;
4234
4235 /* If this is a short forward conditional branch which did not get
4236 its delay slot filled, the delay slot can still be nullified. */
4237 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4238 nullify = forward_branch_p (insn);
4239
4240 /* A forward branch over a single nullified insn can be done with a
4241 comclr instruction. This avoids a single cycle penalty due to
4242 mis-predicted branch if we fall through (branch not taken). */
4243 if (length == 4
4244 && next_real_insn (insn) != 0
4245 && get_attr_length (next_real_insn (insn)) == 4
4246 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4247 && nullify)
4248 useskip = 1;
4249
4250 switch (length)
4251 {
4252 /* All short conditional branches except backwards with an unfilled
4253 delay slot. */
4254 case 4:
4255 if (useskip)
4256 strcpy (buf, "com%I2clr,");
4257 else
4258 strcpy (buf, "com%I2b,");
4259 if (negated)
4260 strcat (buf, "%B3");
4261 else
4262 strcat (buf, "%S3");
4263 if (useskip)
4264 strcat (buf, " %2,%1,0");
4265 else if (nullify)
4266 strcat (buf, ",n %2,%1,%0");
4267 else
4268 strcat (buf, " %2,%1,%0");
4269 break;
4270
4271 /* All long conditionals. Note an short backward branch with an
4272 unfilled delay slot is treated just like a long backward branch
4273 with an unfilled delay slot. */
4274 case 8:
4275 /* Handle weird backwards branch with a filled delay slot
4276 with is nullified. */
4277 if (dbr_sequence_length () != 0
4278 && ! forward_branch_p (insn)
4279 && nullify)
4280 {
4281 strcpy (buf, "com%I2b,");
4282 if (negated)
4283 strcat (buf, "%S3");
4284 else
4285 strcat (buf, "%B3");
4286 strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
4287 }
4288 /* Handle short backwards branch with an unfilled delay slot.
4289 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4290 taken and untaken branches. */
4291 else if (dbr_sequence_length () == 0
4292 && ! forward_branch_p (insn)
4293 && insn_addresses
4294 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4295 - insn_addresses[INSN_UID (insn)] - 8))
4296 {
4297 strcpy (buf, "com%I2b,");
4298 if (negated)
4299 strcat (buf, "%B3 %2,%1,%0%#");
4300 else
4301 strcat (buf, "%S3 %2,%1,%0%#");
4302 }
4303 else
4304 {
4305 strcpy (buf, "com%I2clr,");
4306 if (negated)
4307 strcat (buf, "%S3");
4308 else
4309 strcat (buf, "%B3");
4310 if (nullify)
4311 strcat (buf, " %2,%1,0\n\tbl,n %0,0");
4312 else
4313 strcat (buf, " %2,%1,0\n\tbl %0,0");
4314 }
4315 break;
4316
4317 default:
4318 abort();
4319 }
4320 return buf;
4321 }
4322
4323 /* This routine handles all the branch-on-bit conditional branch sequences we
4324 might need to generate. It handles nullification of delay slots,
4325 varying length branches, negated branches and all combinations of the
4326 above. it returns the appropriate output template to emit the branch. */
4327
4328 char *
4329 output_bb (operands, nullify, length, negated, insn, which)
4330 rtx *operands;
4331 int nullify, length, negated;
4332 rtx insn;
4333 int which;
4334 {
4335 static char buf[100];
4336 int useskip = 0;
4337
4338 /* A conditional branch to the following instruction (eg the delay slot) is
4339 asking for a disaster. I do not think this can happen as this pattern
4340 is only used when optimizing; jump optimization should eliminate the
4341 jump. But be prepared just in case. */
4342
4343 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4344 return "";
4345
4346 /* If this is a long branch with its delay slot unfilled, set `nullify'
4347 as it can nullify the delay slot and save a nop. */
4348 if (length == 8 && dbr_sequence_length () == 0)
4349 nullify = 1;
4350
4351 /* If this is a short forward conditional branch which did not get
4352 its delay slot filled, the delay slot can still be nullified. */
4353 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4354 nullify = forward_branch_p (insn);
4355
4356 /* A forward branch over a single nullified insn can be done with a
4357 extrs instruction. This avoids a single cycle penalty due to
4358 mis-predicted branch if we fall through (branch not taken). */
4359
4360 if (length == 4
4361 && next_real_insn (insn) != 0
4362 && get_attr_length (next_real_insn (insn)) == 4
4363 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4364 && nullify)
4365 useskip = 1;
4366
4367 switch (length)
4368 {
4369
4370 /* All short conditional branches except backwards with an unfilled
4371 delay slot. */
4372 case 4:
4373 if (useskip)
4374 strcpy (buf, "extrs,");
4375 else
4376 strcpy (buf, "bb,");
4377 if ((which == 0 && negated)
4378 || (which == 1 && ! negated))
4379 strcat (buf, ">=");
4380 else
4381 strcat (buf, "<");
4382 if (useskip)
4383 strcat (buf, " %0,%1,1,0");
4384 else if (nullify && negated)
4385 strcat (buf, ",n %0,%1,%3");
4386 else if (nullify && ! negated)
4387 strcat (buf, ",n %0,%1,%2");
4388 else if (! nullify && negated)
4389 strcat (buf, "%0,%1,%3");
4390 else if (! nullify && ! negated)
4391 strcat (buf, " %0,%1,%2");
4392 break;
4393
4394 /* All long conditionals. Note an short backward branch with an
4395 unfilled delay slot is treated just like a long backward branch
4396 with an unfilled delay slot. */
4397 case 8:
4398 /* Handle weird backwards branch with a filled delay slot
4399 with is nullified. */
4400 if (dbr_sequence_length () != 0
4401 && ! forward_branch_p (insn)
4402 && nullify)
4403 {
4404 strcpy (buf, "bb,");
4405 if ((which == 0 && negated)
4406 || (which == 1 && ! negated))
4407 strcat (buf, "<");
4408 else
4409 strcat (buf, ">=");
4410 if (negated)
4411 strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
4412 else
4413 strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
4414 }
4415 /* Handle short backwards branch with an unfilled delay slot.
4416 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4417 taken and untaken branches. */
4418 else if (dbr_sequence_length () == 0
4419 && ! forward_branch_p (insn)
4420 && insn_addresses
4421 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4422 - insn_addresses[INSN_UID (insn)] - 8))
4423 {
4424 strcpy (buf, "bb,");
4425 if ((which == 0 && negated)
4426 || (which == 1 && ! negated))
4427 strcat (buf, ">=");
4428 else
4429 strcat (buf, "<");
4430 if (negated)
4431 strcat (buf, " %0,%1,%3%#");
4432 else
4433 strcat (buf, " %0,%1,%2%#");
4434 }
4435 else
4436 {
4437 strcpy (buf, "extrs,");
4438 if ((which == 0 && negated)
4439 || (which == 1 && ! negated))
4440 strcat (buf, "<");
4441 else
4442 strcat (buf, ">=");
4443 if (nullify && negated)
4444 strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
4445 else if (nullify && ! negated)
4446 strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
4447 else if (negated)
4448 strcat (buf, " %0,%1,1,0\n\tbl %3,0");
4449 else
4450 strcat (buf, " %0,%1,1,0\n\tbl %2,0");
4451 }
4452 break;
4453
4454 default:
4455 abort();
4456 }
4457 return buf;
4458 }
4459
4460 /* This routine handles all the branch-on-variable-bit conditional branch
4461 sequences we might need to generate. It handles nullification of delay
4462 slots, varying length branches, negated branches and all combinations
4463 of the above. it returns the appropriate output template to emit the
4464 branch. */
4465
4466 char *
4467 output_bvb (operands, nullify, length, negated, insn, which)
4468 rtx *operands;
4469 int nullify, length, negated;
4470 rtx insn;
4471 int which;
4472 {
4473 static char buf[100];
4474 int useskip = 0;
4475
4476 /* A conditional branch to the following instruction (eg the delay slot) is
4477 asking for a disaster. I do not think this can happen as this pattern
4478 is only used when optimizing; jump optimization should eliminate the
4479 jump. But be prepared just in case. */
4480
4481 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4482 return "";
4483
4484 /* If this is a long branch with its delay slot unfilled, set `nullify'
4485 as it can nullify the delay slot and save a nop. */
4486 if (length == 8 && dbr_sequence_length () == 0)
4487 nullify = 1;
4488
4489 /* If this is a short forward conditional branch which did not get
4490 its delay slot filled, the delay slot can still be nullified. */
4491 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4492 nullify = forward_branch_p (insn);
4493
4494 /* A forward branch over a single nullified insn can be done with a
4495 extrs instruction. This avoids a single cycle penalty due to
4496 mis-predicted branch if we fall through (branch not taken). */
4497
4498 if (length == 4
4499 && next_real_insn (insn) != 0
4500 && get_attr_length (next_real_insn (insn)) == 4
4501 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4502 && nullify)
4503 useskip = 1;
4504
4505 switch (length)
4506 {
4507
4508 /* All short conditional branches except backwards with an unfilled
4509 delay slot. */
4510 case 4:
4511 if (useskip)
4512 strcpy (buf, "vextrs,");
4513 else
4514 strcpy (buf, "bvb,");
4515 if ((which == 0 && negated)
4516 || (which == 1 && ! negated))
4517 strcat (buf, ">=");
4518 else
4519 strcat (buf, "<");
4520 if (useskip)
4521 strcat (buf, " %0,1,0");
4522 else if (nullify && negated)
4523 strcat (buf, ",n %0,%3");
4524 else if (nullify && ! negated)
4525 strcat (buf, ",n %0,%2");
4526 else if (! nullify && negated)
4527 strcat (buf, "%0,%3");
4528 else if (! nullify && ! negated)
4529 strcat (buf, " %0,%2");
4530 break;
4531
4532 /* All long conditionals. Note an short backward branch with an
4533 unfilled delay slot is treated just like a long backward branch
4534 with an unfilled delay slot. */
4535 case 8:
4536 /* Handle weird backwards branch with a filled delay slot
4537 with is nullified. */
4538 if (dbr_sequence_length () != 0
4539 && ! forward_branch_p (insn)
4540 && nullify)
4541 {
4542 strcpy (buf, "bvb,");
4543 if ((which == 0 && negated)
4544 || (which == 1 && ! negated))
4545 strcat (buf, "<");
4546 else
4547 strcat (buf, ">=");
4548 if (negated)
4549 strcat (buf, ",n %0,.+12\n\tbl %3,0");
4550 else
4551 strcat (buf, ",n %0,.+12\n\tbl %2,0");
4552 }
4553 /* Handle short backwards branch with an unfilled delay slot.
4554 Using a bb;nop rather than extrs;bl saves 1 cycle for both
4555 taken and untaken branches. */
4556 else if (dbr_sequence_length () == 0
4557 && ! forward_branch_p (insn)
4558 && insn_addresses
4559 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4560 - insn_addresses[INSN_UID (insn)] - 8))
4561 {
4562 strcpy (buf, "bvb,");
4563 if ((which == 0 && negated)
4564 || (which == 1 && ! negated))
4565 strcat (buf, ">=");
4566 else
4567 strcat (buf, "<");
4568 if (negated)
4569 strcat (buf, " %0,%3%#");
4570 else
4571 strcat (buf, " %0,%2%#");
4572 }
4573 else
4574 {
4575 strcpy (buf, "vextrs,");
4576 if ((which == 0 && negated)
4577 || (which == 1 && ! negated))
4578 strcat (buf, "<");
4579 else
4580 strcat (buf, ">=");
4581 if (nullify && negated)
4582 strcat (buf, " %0,1,0\n\tbl,n %3,0");
4583 else if (nullify && ! negated)
4584 strcat (buf, " %0,1,0\n\tbl,n %2,0");
4585 else if (negated)
4586 strcat (buf, " %0,1,0\n\tbl %3,0");
4587 else
4588 strcat (buf, " %0,1,0\n\tbl %2,0");
4589 }
4590 break;
4591
4592 default:
4593 abort();
4594 }
4595 return buf;
4596 }
4597
4598 /* Return the output template for emitting a dbra type insn.
4599
4600 Note it may perform some output operations on its own before
4601 returning the final output string. */
4602 char *
4603 output_dbra (operands, insn, which_alternative)
4604 rtx *operands;
4605 rtx insn;
4606 int which_alternative;
4607 {
4608
4609 /* A conditional branch to the following instruction (eg the delay slot) is
4610 asking for a disaster. Be prepared! */
4611
4612 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4613 {
4614 if (which_alternative == 0)
4615 return "ldo %1(%0),%0";
4616 else if (which_alternative == 1)
4617 {
4618 output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
4619 output_asm_insn ("ldw -16(0,%%r30),%4",operands);
4620 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4621 return "fldws -16(0,%%r30),%0";
4622 }
4623 else
4624 {
4625 output_asm_insn ("ldw %0,%4", operands);
4626 return "ldo %1(%4),%4\n\tstw %4,%0";
4627 }
4628 }
4629
4630 if (which_alternative == 0)
4631 {
4632 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4633 int length = get_attr_length (insn);
4634
4635 /* If this is a long branch with its delay slot unfilled, set `nullify'
4636 as it can nullify the delay slot and save a nop. */
4637 if (length == 8 && dbr_sequence_length () == 0)
4638 nullify = 1;
4639
4640 /* If this is a short forward conditional branch which did not get
4641 its delay slot filled, the delay slot can still be nullified. */
4642 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4643 nullify = forward_branch_p (insn);
4644
4645 /* Handle short versions first. */
4646 if (length == 4 && nullify)
4647 return "addib,%C2,n %1,%0,%3";
4648 else if (length == 4 && ! nullify)
4649 return "addib,%C2 %1,%0,%3";
4650 else if (length == 8)
4651 {
4652 /* Handle weird backwards branch with a fulled delay slot
4653 which is nullified. */
4654 if (dbr_sequence_length () != 0
4655 && ! forward_branch_p (insn)
4656 && nullify)
4657 return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
4658 /* Handle short backwards branch with an unfilled delay slot.
4659 Using a addb;nop rather than addi;bl saves 1 cycle for both
4660 taken and untaken branches. */
4661 else if (dbr_sequence_length () == 0
4662 && ! forward_branch_p (insn)
4663 && insn_addresses
4664 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4665 - insn_addresses[INSN_UID (insn)] - 8))
4666 return "addib,%C2 %1,%0,%3%#";
4667
4668 /* Handle normal cases. */
4669 if (nullify)
4670 return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
4671 else
4672 return "addi,%N2 %1,%0,%0\n\tbl %3,0";
4673 }
4674 else
4675 abort();
4676 }
4677 /* Deal with gross reload from FP register case. */
4678 else if (which_alternative == 1)
4679 {
4680 /* Move loop counter from FP register to MEM then into a GR,
4681 increment the GR, store the GR into MEM, and finally reload
4682 the FP register from MEM from within the branch's delay slot. */
4683 output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
4684 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
4685 if (get_attr_length (insn) == 24)
4686 return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
4687 else
4688 return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4689 }
4690 /* Deal with gross reload from memory case. */
4691 else
4692 {
4693 /* Reload loop counter from memory, the store back to memory
4694 happens in the branch's delay slot. */
4695 output_asm_insn ("ldw %0,%4", operands);
4696 if (get_attr_length (insn) == 12)
4697 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
4698 else
4699 return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
4700 }
4701 }
4702
4703 /* Return the output template for emitting a dbra type insn.
4704
4705 Note it may perform some output operations on its own before
4706 returning the final output string. */
4707 char *
4708 output_movb (operands, insn, which_alternative, reverse_comparison)
4709 rtx *operands;
4710 rtx insn;
4711 int which_alternative;
4712 int reverse_comparison;
4713 {
4714
4715 /* A conditional branch to the following instruction (eg the delay slot) is
4716 asking for a disaster. Be prepared! */
4717
4718 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4719 {
4720 if (which_alternative == 0)
4721 return "copy %1,%0";
4722 else if (which_alternative == 1)
4723 {
4724 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4725 return "fldws -16(0,%%r30),%0";
4726 }
4727 else if (which_alternative == 2)
4728 return "stw %1,%0";
4729 else
4730 return "mtsar %r1";
4731 }
4732
4733 /* Support the second variant. */
4734 if (reverse_comparison)
4735 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
4736
4737 if (which_alternative == 0)
4738 {
4739 int nullify = INSN_ANNULLED_BRANCH_P (insn);
4740 int length = get_attr_length (insn);
4741
4742 /* If this is a long branch with its delay slot unfilled, set `nullify'
4743 as it can nullify the delay slot and save a nop. */
4744 if (length == 8 && dbr_sequence_length () == 0)
4745 nullify = 1;
4746
4747 /* If this is a short forward conditional branch which did not get
4748 its delay slot filled, the delay slot can still be nullified. */
4749 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4750 nullify = forward_branch_p (insn);
4751
4752 /* Handle short versions first. */
4753 if (length == 4 && nullify)
4754 return "movb,%C2,n %1,%0,%3";
4755 else if (length == 4 && ! nullify)
4756 return "movb,%C2 %1,%0,%3";
4757 else if (length == 8)
4758 {
4759 /* Handle weird backwards branch with a filled delay slot
4760 which is nullified. */
4761 if (dbr_sequence_length () != 0
4762 && ! forward_branch_p (insn)
4763 && nullify)
4764 return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
4765
4766 /* Handle short backwards branch with an unfilled delay slot.
4767 Using a movb;nop rather than or;bl saves 1 cycle for both
4768 taken and untaken branches. */
4769 else if (dbr_sequence_length () == 0
4770 && ! forward_branch_p (insn)
4771 && insn_addresses
4772 && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
4773 - insn_addresses[INSN_UID (insn)] - 8))
4774 return "movb,%C2 %1,%0,%3%#";
4775 /* Handle normal cases. */
4776 if (nullify)
4777 return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
4778 else
4779 return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
4780 }
4781 else
4782 abort();
4783 }
4784 /* Deal with gross reload from FP register case. */
4785 else if (which_alternative == 1)
4786 {
4787 /* Move loop counter from FP register to MEM then into a GR,
4788 increment the GR, store the GR into MEM, and finally reload
4789 the FP register from MEM from within the branch's delay slot. */
4790 output_asm_insn ("stw %1,-16(0,%%r30)",operands);
4791 if (get_attr_length (insn) == 12)
4792 return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
4793 else
4794 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
4795 }
4796 /* Deal with gross reload from memory case. */
4797 else if (which_alternative == 2)
4798 {
4799 /* Reload loop counter from memory, the store back to memory
4800 happens in the branch's delay slot. */
4801 if (get_attr_length (insn) == 8)
4802 return "comb,%S2 0,%1,%3\n\tstw %1,%0";
4803 else
4804 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
4805 }
4806 /* Handle SAR as a destination. */
4807 else
4808 {
4809 if (get_attr_length (insn) == 8)
4810 return "comb,%S2 0,%1,%3\n\tmtsar %r1";
4811 else
4812 return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
4813 }
4814 }
4815
4816
4817 /* INSN is a millicode call. It may have an unconditional jump in its delay
4818 slot.
4819
4820 CALL_DEST is the routine we are calling. */
4821
4822 char *
4823 output_millicode_call (insn, call_dest)
4824 rtx insn;
4825 rtx call_dest;
4826 {
4827 int distance;
4828 rtx xoperands[4];
4829 rtx seq_insn;
4830
4831 /* Handle common case -- empty delay slot or no jump in the delay slot,
4832 and we're sure that the branch will reach the beginning of the $CODE$
4833 subspace. */
4834 if ((dbr_sequence_length () == 0
4835 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
4836 || (dbr_sequence_length () != 0
4837 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4838 && get_attr_length (insn) == 4))
4839 {
4840 xoperands[0] = call_dest;
4841 output_asm_insn ("bl %0,%%r31%#", xoperands);
4842 return "";
4843 }
4844
4845 /* This call may not reach the beginning of the $CODE$ subspace. */
4846 if (get_attr_length (insn) > 4)
4847 {
4848 int delay_insn_deleted = 0;
4849 rtx xoperands[2];
4850 rtx link;
4851
4852 /* We need to emit an inline long-call branch. */
4853 if (dbr_sequence_length () != 0
4854 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
4855 {
4856 /* A non-jump insn in the delay slot. By definition we can
4857 emit this insn before the call. */
4858 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
4859
4860 /* Now delete the delay insn. */
4861 PUT_CODE (NEXT_INSN (insn), NOTE);
4862 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4863 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4864 delay_insn_deleted = 1;
4865 }
4866
4867 /* If we're allowed to use be/ble instructions, then this is the
4868 best sequence to use for a long millicode call. */
4869 if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
4870 || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
4871 {
4872 xoperands[0] = call_dest;
4873 output_asm_insn ("ldil L%%%0,%%r31", xoperands);
4874 output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
4875 output_asm_insn ("nop", xoperands);
4876 }
4877 /* Pure portable runtime doesn't allow be/ble; we also don't have
4878 PIC support int he assembler/linker, so this sequence is needed. */
4879 else if (TARGET_PORTABLE_RUNTIME)
4880 {
4881 xoperands[0] = call_dest;
4882 /* Get the address of our target into %r29. */
4883 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
4884 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
4885
4886 /* Get our return address into %r31. */
4887 output_asm_insn ("blr 0,%%r31", xoperands);
4888
4889 /* Jump to our target address in %r29. */
4890 output_asm_insn ("bv,n 0(%%r29)", xoperands);
4891
4892 /* Empty delay slot. Note this insn gets fetched twice and
4893 executed once. To be safe we use a nop. */
4894 output_asm_insn ("nop", xoperands);
4895 return "";
4896 }
4897 /* PIC long millicode call sequence. */
4898 else
4899 {
4900 xoperands[0] = call_dest;
4901 xoperands[1] = gen_label_rtx ();
4902 /* Get our address + 8 into %r1. */
4903 output_asm_insn ("bl .+8,%%r1", xoperands);
4904
4905 /* Add %r1 to the offset of our target from the next insn. */
4906 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
4907 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4908 CODE_LABEL_NUMBER (xoperands[1]));
4909 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
4910
4911 /* Get the return address into %r31. */
4912 output_asm_insn ("blr 0,%%r31", xoperands);
4913
4914 /* Branch to our target which is in %r1. */
4915 output_asm_insn ("bv,n 0(%%r1)", xoperands);
4916
4917 /* Empty delay slot. Note this insn gets fetched twice and
4918 executed once. To be safe we use a nop. */
4919 output_asm_insn ("nop", xoperands);
4920 }
4921
4922 /* If we had a jump in the call's delay slot, output it now. */
4923 if (dbr_sequence_length () != 0
4924 && !delay_insn_deleted)
4925 {
4926 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4927 output_asm_insn ("b,n %0", xoperands);
4928
4929 /* Now delete the delay insn. */
4930 PUT_CODE (NEXT_INSN (insn), NOTE);
4931 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4932 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4933 }
4934 return "";
4935 }
4936
4937 /* This call has an unconditional jump in its delay slot and the
4938 call is known to reach its target or the beginning of the current
4939 subspace. */
4940
4941 /* Use the containing sequence insn's address. */
4942 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
4943
4944 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
4945 - insn_addresses[INSN_UID (seq_insn)] - 8;
4946
4947 /* If the branch was too far away, emit a normal call followed
4948 by a nop, followed by the unconditional branch.
4949
4950 If the branch is close, then adjust %r2 from within the
4951 call's delay slot. */
4952
4953 xoperands[0] = call_dest;
4954 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
4955 if (! VAL_14_BITS_P (distance))
4956 output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
4957 else
4958 {
4959 xoperands[3] = gen_label_rtx ();
4960 output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
4961 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4962 CODE_LABEL_NUMBER (xoperands[3]));
4963 }
4964
4965 /* Delete the jump. */
4966 PUT_CODE (NEXT_INSN (insn), NOTE);
4967 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
4968 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
4969 return "";
4970 }
4971
4972 /* INSN is either a function call. It may have an unconditional jump
4973 in its delay slot.
4974
4975 CALL_DEST is the routine we are calling. */
4976
4977 char *
4978 output_call (insn, call_dest)
4979 rtx insn;
4980 rtx call_dest;
4981 {
4982 int distance;
4983 rtx xoperands[4];
4984 rtx seq_insn;
4985
4986 /* Handle common case -- empty delay slot or no jump in the delay slot,
4987 and we're sure that the branch will reach the beginning of the $CODE$
4988 subspace. */
4989 if ((dbr_sequence_length () == 0
4990 && get_attr_length (insn) == 8)
4991 || (dbr_sequence_length () != 0
4992 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
4993 && get_attr_length (insn) == 4))
4994 {
4995 xoperands[0] = call_dest;
4996 output_asm_insn ("bl %0,%%r2%#", xoperands);
4997 return "";
4998 }
4999
5000 /* This call may not reach the beginning of the $CODE$ subspace. */
5001 if (get_attr_length (insn) > 8)
5002 {
5003 int delay_insn_deleted = 0;
5004 rtx xoperands[2];
5005 rtx link;
5006
5007 /* We need to emit an inline long-call branch. Furthermore,
5008 because we're changing a named function call into an indirect
5009 function call well after the parameters have been set up, we
5010 need to make sure any FP args appear in both the integer
5011 and FP registers. Also, we need move any delay slot insn
5012 out of the delay slot. And finally, we can't rely on the linker
5013 being able to fix the call to $$dyncall! -- Yuk!. */
5014 if (dbr_sequence_length () != 0
5015 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5016 {
5017 /* A non-jump insn in the delay slot. By definition we can
5018 emit this insn before the call (and in fact before argument
5019 relocating. */
5020 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5021
5022 /* Now delete the delay insn. */
5023 PUT_CODE (NEXT_INSN (insn), NOTE);
5024 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5025 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5026 delay_insn_deleted = 1;
5027 }
5028
5029 /* Now copy any FP arguments into integer registers. */
5030 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5031 {
5032 int arg_mode, regno;
5033 rtx use = XEXP (link, 0);
5034 if (! (GET_CODE (use) == USE
5035 && GET_CODE (XEXP (use, 0)) == REG
5036 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5037 continue;
5038
5039 arg_mode = GET_MODE (XEXP (use, 0));
5040 regno = REGNO (XEXP (use, 0));
5041 /* Is it a floating point register? */
5042 if (regno >= 32 && regno <= 39)
5043 {
5044 /* Copy from the FP register into an integer register
5045 (via memory). */
5046 if (arg_mode == SFmode)
5047 {
5048 xoperands[0] = XEXP (use, 0);
5049 xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
5050 output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
5051 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5052 }
5053 else
5054 {
5055 xoperands[0] = XEXP (use, 0);
5056 xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
5057 output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
5058 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5059 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5060 }
5061
5062 }
5063 }
5064
5065 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5066 we don't have any direct calls in that case. */
5067 if (flag_pic)
5068 {
5069 /* We have to load the address of the function using a procedure
5070 label (plabel). The LP and RP relocs don't work reliably for PIC,
5071 so we make a plain 32 bit plabel in the data segment instead. We
5072 have to defer outputting it of course... Not pretty. */
5073
5074 xoperands[0] = gen_label_rtx ();
5075 xoperands[1] = gen_label_rtx ();
5076 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5077 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5078 output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
5079
5080 if (deferred_plabels == 0)
5081 deferred_plabels = (struct defer_plab *)
5082 xmalloc (1 * sizeof (struct defer_plab));
5083 else
5084 deferred_plabels = (struct defer_plab *)
5085 xrealloc (deferred_plabels,
5086 (n_deferred_plabels + 1) * sizeof (struct defer_plab));
5087 deferred_plabels[n_deferred_plabels].internal_label = xoperands[0];
5088 deferred_plabels[n_deferred_plabels].symbol = call_dest;
5089 n_deferred_plabels++;
5090
5091 /* Get our address + 8 into %r1. */
5092 output_asm_insn ("bl .+8,%%r1", xoperands);
5093
5094 /* Add %r1 to the offset of dyncall from the next insn. */
5095 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5096 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5097 CODE_LABEL_NUMBER (xoperands[1]));
5098 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5099
5100 /* Get the return address into %r31. */
5101 output_asm_insn ("blr 0,%%r31", xoperands);
5102
5103 /* Branch to our target which is in %r1. */
5104 output_asm_insn ("bv 0(%%r1)", xoperands);
5105
5106 /* Copy the return address into %r2 also. */
5107 output_asm_insn ("copy %%r31,%%r2", xoperands);
5108 }
5109 else
5110 {
5111 /* No PIC stuff to worry about. We can use ldil;ble. */
5112 xoperands[0] = call_dest;
5113
5114 /* Get the address of our target into %r22. */
5115 output_asm_insn ("ldil LP%%%0,%%r22", xoperands);
5116 output_asm_insn ("ldo RP%%%0(%%r22),%%r22", xoperands);
5117
5118 /* Get the high part of the address of $dyncall into %r2, then
5119 add in the low part in the branch instruction. */
5120 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5121 output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
5122
5123 /* Copy the return pointer into both %r31 and %r2. */
5124 output_asm_insn ("copy %%r31,%%r2", xoperands);
5125 }
5126
5127 /* If we had a jump in the call's delay slot, output it now. */
5128 if (dbr_sequence_length () != 0
5129 && !delay_insn_deleted)
5130 {
5131 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5132 output_asm_insn ("b,n %0", xoperands);
5133
5134 /* Now delete the delay insn. */
5135 PUT_CODE (NEXT_INSN (insn), NOTE);
5136 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5137 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5138 }
5139 return "";
5140 }
5141
5142 /* This call has an unconditional jump in its delay slot and the
5143 call is known to reach its target or the beginning of the current
5144 subspace. */
5145
5146 /* Use the containing sequence insn's address. */
5147 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5148
5149 distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
5150 - insn_addresses[INSN_UID (seq_insn)] - 8;
5151
5152 /* If the branch was too far away, emit a normal call followed
5153 by a nop, followed by the unconditional branch.
5154
5155 If the branch is close, then adjust %r2 from within the
5156 call's delay slot. */
5157
5158 xoperands[0] = call_dest;
5159 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5160 if (! VAL_14_BITS_P (distance))
5161 output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
5162 else
5163 {
5164 xoperands[3] = gen_label_rtx ();
5165 output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
5166 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5167 CODE_LABEL_NUMBER (xoperands[3]));
5168 }
5169
5170 /* Delete the jump. */
5171 PUT_CODE (NEXT_INSN (insn), NOTE);
5172 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5173 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5174 return "";
5175 }
5176
5177 extern struct obstack permanent_obstack;
5178 extern struct obstack *saveable_obstack;
5179
5180 /* In HPUX 8.0's shared library scheme, special relocations are needed
5181 for function labels if they might be passed to a function
5182 in a shared library (because shared libraries don't live in code
5183 space), and special magic is needed to construct their address.
5184
5185 For reasons too disgusting to describe storage for the new name
5186 is allocated either on the saveable_obstack (released at function
5187 exit) or on the permanent_obstack for things that can never change
5188 (libcall names for example). */
5189
5190 void
5191 hppa_encode_label (sym, permanent)
5192 rtx sym;
5193 int permanent;
5194 {
5195 char *str = XSTR (sym, 0);
5196 int len = strlen (str);
5197 char *newstr;
5198
5199 newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
5200 len + 2);
5201
5202 if (str[0] == '*')
5203 *newstr++ = *str++;
5204 strcpy (newstr + 1, str);
5205 *newstr = '@';
5206 XSTR (sym,0) = newstr;
5207 }
5208
5209 int
5210 function_label_operand (op, mode)
5211 rtx op;
5212 enum machine_mode mode;
5213 {
5214 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5215 }
5216
5217 /* Returns 1 if OP is a function label involved in a simple addition
5218 with a constant. Used to keep certain patterns from matching
5219 during instruction combination. */
5220 int
5221 is_function_label_plus_const (op)
5222 rtx op;
5223 {
5224 /* Strip off any CONST. */
5225 if (GET_CODE (op) == CONST)
5226 op = XEXP (op, 0);
5227
5228 return (GET_CODE (op) == PLUS
5229 && function_label_operand (XEXP (op, 0), Pmode)
5230 && GET_CODE (XEXP (op, 1)) == CONST_INT);
5231 }
5232
5233 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5234 use in fmpyadd instructions. */
5235 int
5236 fmpyaddoperands (operands)
5237 rtx *operands;
5238 {
5239 enum machine_mode mode = GET_MODE (operands[0]);
5240
5241 /* Must be a floating point mode. */
5242 if (mode != SFmode && mode != DFmode)
5243 return 0;
5244
5245 /* All modes must be the same. */
5246 if (! (mode == GET_MODE (operands[1])
5247 && mode == GET_MODE (operands[2])
5248 && mode == GET_MODE (operands[3])
5249 && mode == GET_MODE (operands[4])
5250 && mode == GET_MODE (operands[5])))
5251 return 0;
5252
5253 /* All operands must be registers. */
5254 if (! (GET_CODE (operands[1]) == REG
5255 && GET_CODE (operands[2]) == REG
5256 && GET_CODE (operands[3]) == REG
5257 && GET_CODE (operands[4]) == REG
5258 && GET_CODE (operands[5]) == REG))
5259 return 0;
5260
5261 /* Only 2 real operands to the addition. One of the input operands must
5262 be the same as the output operand. */
5263 if (! rtx_equal_p (operands[3], operands[4])
5264 && ! rtx_equal_p (operands[3], operands[5]))
5265 return 0;
5266
5267 /* Inout operand of add can not conflict with any operands from multiply. */
5268 if (rtx_equal_p (operands[3], operands[0])
5269 || rtx_equal_p (operands[3], operands[1])
5270 || rtx_equal_p (operands[3], operands[2]))
5271 return 0;
5272
5273 /* multiply can not feed into addition operands. */
5274 if (rtx_equal_p (operands[4], operands[0])
5275 || rtx_equal_p (operands[5], operands[0]))
5276 return 0;
5277
5278 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5279 if (mode == SFmode
5280 && (REGNO (operands[0]) < 57
5281 || REGNO (operands[1]) < 57
5282 || REGNO (operands[2]) < 57
5283 || REGNO (operands[3]) < 57
5284 || REGNO (operands[4]) < 57
5285 || REGNO (operands[5]) < 57))
5286 return 0;
5287
5288 /* Passed. Operands are suitable for fmpyadd. */
5289 return 1;
5290 }
5291
5292 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
5293 use in fmpysub instructions. */
5294 int
5295 fmpysuboperands (operands)
5296 rtx *operands;
5297 {
5298 enum machine_mode mode = GET_MODE (operands[0]);
5299
5300 /* Must be a floating point mode. */
5301 if (mode != SFmode && mode != DFmode)
5302 return 0;
5303
5304 /* All modes must be the same. */
5305 if (! (mode == GET_MODE (operands[1])
5306 && mode == GET_MODE (operands[2])
5307 && mode == GET_MODE (operands[3])
5308 && mode == GET_MODE (operands[4])
5309 && mode == GET_MODE (operands[5])))
5310 return 0;
5311
5312 /* All operands must be registers. */
5313 if (! (GET_CODE (operands[1]) == REG
5314 && GET_CODE (operands[2]) == REG
5315 && GET_CODE (operands[3]) == REG
5316 && GET_CODE (operands[4]) == REG
5317 && GET_CODE (operands[5]) == REG))
5318 return 0;
5319
5320 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
5321 operation, so operands[4] must be the same as operand[3]. */
5322 if (! rtx_equal_p (operands[3], operands[4]))
5323 return 0;
5324
5325 /* multiply can not feed into subtraction. */
5326 if (rtx_equal_p (operands[5], operands[0]))
5327 return 0;
5328
5329 /* Inout operand of sub can not conflict with any operands from multiply. */
5330 if (rtx_equal_p (operands[3], operands[0])
5331 || rtx_equal_p (operands[3], operands[1])
5332 || rtx_equal_p (operands[3], operands[2]))
5333 return 0;
5334
5335 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
5336 if (mode == SFmode
5337 && (REGNO (operands[0]) < 57
5338 || REGNO (operands[1]) < 57
5339 || REGNO (operands[2]) < 57
5340 || REGNO (operands[3]) < 57
5341 || REGNO (operands[4]) < 57
5342 || REGNO (operands[5]) < 57))
5343 return 0;
5344
5345 /* Passed. Operands are suitable for fmpysub. */
5346 return 1;
5347 }
5348
5349 int
5350 plus_xor_ior_operator (op, mode)
5351 rtx op;
5352 enum machine_mode mode;
5353 {
5354 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
5355 || GET_CODE (op) == IOR);
5356 }
5357
5358 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
5359 constants for shadd instructions. */
5360 int
5361 shadd_constant_p (val)
5362 int val;
5363 {
5364 if (val == 2 || val == 4 || val == 8)
5365 return 1;
5366 else
5367 return 0;
5368 }
5369
5370 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
5371 the valid constant for shadd instructions. */
5372 int
5373 shadd_operand (op, mode)
5374 rtx op;
5375 enum machine_mode mode;
5376 {
5377 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
5378 }
5379
5380 /* Return 1 if OP is valid as a base register in a reg + reg address. */
5381
5382 int
5383 basereg_operand (op, mode)
5384 rtx op;
5385 enum machine_mode mode;
5386 {
5387 /* cse will create some unscaled indexed addresses, however; it
5388 generally isn't a win on the PA, so avoid creating unscaled
5389 indexed addresses until after cse is finished. */
5390 if (!cse_not_expected)
5391 return 0;
5392
5393 /* Once reload has started everything is considered valid. Reload should
5394 only create indexed addresses using the stack/frame pointer, and any
5395 others were checked for validity when created by the combine pass.
5396
5397 Also allow any register when TARGET_NO_SPACE_REGS is in effect since
5398 we don't have to worry about the braindamaged implicit space register
5399 selection using the basereg only (rather than effective address)
5400 screwing us over. */
5401 if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
5402 return (GET_CODE (op) == REG);
5403
5404 /* Stack is always OK for indexing. */
5405 if (op == stack_pointer_rtx)
5406 return 1;
5407
5408 /* While it's always safe to index off the frame pointer, it's not
5409 always profitable, particularly when the frame pointer is being
5410 eliminated. */
5411 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
5412 return 1;
5413
5414 /* The only other valid OPs are pseudo registers with
5415 REGNO_POINTER_FLAG set. */
5416 if (GET_CODE (op) != REG
5417 || REGNO (op) < FIRST_PSEUDO_REGISTER
5418 || ! register_operand (op, mode))
5419 return 0;
5420
5421 return REGNO_POINTER_FLAG (REGNO (op));
5422 }
5423
5424 /* Return 1 if this operand is anything other than a hard register. */
5425
5426 int
5427 non_hard_reg_operand (op, mode)
5428 rtx op;
5429 enum machine_mode mode;
5430 {
5431 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
5432 }
5433
5434 /* Return 1 if INSN branches forward. Should be using insn_addresses
5435 to avoid walking through all the insns... */
5436 int
5437 forward_branch_p (insn)
5438 rtx insn;
5439 {
5440 rtx label = JUMP_LABEL (insn);
5441
5442 while (insn)
5443 {
5444 if (insn == label)
5445 break;
5446 else
5447 insn = NEXT_INSN (insn);
5448 }
5449
5450 return (insn == label);
5451 }
5452
5453 /* Return 1 if OP is an equality comparison, else return 0. */
5454 int
5455 eq_neq_comparison_operator (op, mode)
5456 rtx op;
5457 enum machine_mode mode;
5458 {
5459 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
5460 }
5461
5462 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
5463 int
5464 movb_comparison_operator (op, mode)
5465 rtx op;
5466 enum machine_mode mode;
5467 {
5468 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
5469 || GET_CODE (op) == LT || GET_CODE (op) == GE);
5470 }
5471
5472 /* Return 1 if INSN is in the delay slot of a call instruction. */
5473 int
5474 jump_in_call_delay (insn)
5475 rtx insn;
5476 {
5477
5478 if (GET_CODE (insn) != JUMP_INSN)
5479 return 0;
5480
5481 if (PREV_INSN (insn)
5482 && PREV_INSN (PREV_INSN (insn))
5483 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
5484 {
5485 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
5486
5487 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
5488 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
5489
5490 }
5491 else
5492 return 0;
5493 }
5494
5495 /* Output an unconditional move and branch insn. */
5496
5497 char *
5498 output_parallel_movb (operands, length)
5499 rtx *operands;
5500 int length;
5501 {
5502 /* These are the cases in which we win. */
5503 if (length == 4)
5504 return "mov%I1b,tr %1,%0,%2";
5505
5506 /* None of these cases wins, but they don't lose either. */
5507 if (dbr_sequence_length () == 0)
5508 {
5509 /* Nothing in the delay slot, fake it by putting the combined
5510 insn (the copy or add) in the delay slot of a bl. */
5511 if (GET_CODE (operands[1]) == CONST_INT)
5512 return "bl %2,0\n\tldi %1,%0";
5513 else
5514 return "bl %2,0\n\tcopy %1,%0";
5515 }
5516 else
5517 {
5518 /* Something in the delay slot, but we've got a long branch. */
5519 if (GET_CODE (operands[1]) == CONST_INT)
5520 return "ldi %1,%0\n\tbl %2,0";
5521 else
5522 return "copy %1,%0\n\tbl %2,0";
5523 }
5524 }
5525
5526 /* Output an unconditional add and branch insn. */
5527
5528 char *
5529 output_parallel_addb (operands, length)
5530 rtx *operands;
5531 int length;
5532 {
5533 /* To make life easy we want operand0 to be the shared input/output
5534 operand and operand1 to be the readonly operand. */
5535 if (operands[0] == operands[1])
5536 operands[1] = operands[2];
5537
5538 /* These are the cases in which we win. */
5539 if (length == 4)
5540 return "add%I1b,tr %1,%0,%3";
5541
5542 /* None of these cases win, but they don't lose either. */
5543 if (dbr_sequence_length () == 0)
5544 {
5545 /* Nothing in the delay slot, fake it by putting the combined
5546 insn (the copy or add) in the delay slot of a bl. */
5547 return "bl %3,0\n\tadd%I1 %1,%0,%0";
5548 }
5549 else
5550 {
5551 /* Something in the delay slot, but we've got a long branch. */
5552 return "add%I1 %1,%0,%0\n\tbl %3,0";
5553 }
5554 }
5555
5556 /* Return nonzero if INSN (a jump insn) immediately follows a call. This
5557 is used to discourage creating parallel movb/addb insns since a jump
5558 which immediately follows a call can execute in the delay slot of the
5559 call. */
5560
5561 following_call (insn)
5562 rtx insn;
5563 {
5564 /* Find the previous real insn, skipping NOTEs. */
5565 insn = PREV_INSN (insn);
5566 while (insn && GET_CODE (insn) == NOTE)
5567 insn = PREV_INSN (insn);
5568
5569 /* Check for CALL_INSNs and millicode calls. */
5570 if (insn
5571 && (GET_CODE (insn) == CALL_INSN
5572 || (GET_CODE (insn) == INSN
5573 && GET_CODE (PATTERN (insn)) != SEQUENCE
5574 && GET_CODE (PATTERN (insn)) != USE
5575 && GET_CODE (PATTERN (insn)) != CLOBBER
5576 && get_attr_type (insn) == TYPE_MILLI)))
5577 return 1;
5578
5579 return 0;
5580 }
5581
5582 /* We use this hook to perform a PA specific optimization which is difficult
5583 to do in earlier passes.
5584
5585 We want the delay slots of branches within jump tables to be filled.
5586 None of the compiler passes at the moment even has the notion that a
5587 PA jump table doesn't contain addresses, but instead contains actual
5588 instructions!
5589
5590 Because we actually jump into the table, the addresses of each entry
5591 must stay constant in relation to the beginning of the table (which
5592 itself must stay constant relative to the instruction to jump into
5593 it). I don't believe we can guarantee earlier passes of the compiler
5594 will adhere to those rules.
5595
5596 So, late in the compilation process we find all the jump tables, and
5597 expand them into real code -- eg each entry in the jump table vector
5598 will get an appropriate label followed by a jump to the final target.
5599
5600 Reorg and the final jump pass can then optimize these branches and
5601 fill their delay slots. We end up with smaller, more efficient code.
5602
5603 The jump instructions within the table are special; we must be able
5604 to identify them during assembly output (if the jumps don't get filled
5605 we need to emit a nop rather than nullifying the delay slot)). We
5606 identify jumps in switch tables by marking the SET with DImode. */
5607
5608 pa_reorg (insns)
5609 rtx insns;
5610 {
5611 rtx insn;
5612
5613 remove_useless_addtr_insns (insns, 1);
5614
5615 pa_combine_instructions (get_insns ());
5616
5617 /* This is fairly cheap, so always run it if optimizing. */
5618 if (optimize > 0)
5619 {
5620 /* Find and explode all ADDR_VEC insns. */
5621 insns = get_insns ();
5622 for (insn = insns; insn; insn = NEXT_INSN (insn))
5623 {
5624 rtx pattern, tmp, location;
5625 unsigned int length, i;
5626
5627 /* Find an ADDR_VEC insn to explode. */
5628 if (GET_CODE (insn) != JUMP_INSN
5629 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5630 continue;
5631
5632 /* If needed, emit marker for the beginning of the branch table. */
5633 if (TARGET_GAS)
5634 emit_insn_before (gen_begin_brtab (), insn);
5635
5636 pattern = PATTERN (insn);
5637 location = PREV_INSN (insn);
5638 length = XVECLEN (pattern, 0);
5639
5640 for (i = 0; i < length; i++)
5641 {
5642 /* Emit the jump itself. */
5643 tmp = gen_switch_jump (XEXP (XVECEXP (pattern, 0, i), 0));
5644 tmp = emit_jump_insn_after (tmp, location);
5645 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
5646 LABEL_NUSES (JUMP_LABEL (tmp))++;
5647
5648 /* Emit a BARRIER after the jump. */
5649 location = NEXT_INSN (location);
5650 emit_barrier_after (location);
5651
5652 /* Put a CODE_LABEL before each so jump.c does not optimize
5653 the jumps away. */
5654 location = NEXT_INSN (location);
5655 tmp = gen_label_rtx ();
5656 LABEL_NUSES (tmp) = 1;
5657 emit_label_after (tmp, location);
5658 location = NEXT_INSN (location);
5659 }
5660
5661 /* If needed, emit marker for the end of the branch table. */
5662 if (TARGET_GAS)
5663 emit_insn_before (gen_end_brtab (), location);
5664 /* Delete the ADDR_VEC. */
5665 delete_insn (insn);
5666 }
5667 }
5668 else if (TARGET_GAS)
5669 {
5670 /* Sill need an end_brtab insn. */
5671 insns = get_insns ();
5672 for (insn = insns; insn; insn = NEXT_INSN (insn))
5673 {
5674 /* Find an ADDR_VEC insn. */
5675 if (GET_CODE (insn) != JUMP_INSN
5676 || GET_CODE (PATTERN (insn)) != ADDR_VEC)
5677 continue;
5678
5679 /* Now generate markers for the beginning and end of the
5680 branc table. */
5681 emit_insn_before (gen_begin_brtab (), insn);
5682 emit_insn_after (gen_end_brtab (), insn);
5683 }
5684 }
5685 }
5686
5687 /* The PA has a number of odd instructions which can perform multiple
5688 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
5689 it may be profitable to combine two instructions into one instruction
5690 with two outputs. It's not profitable PA2.0 machines because the
5691 two outputs would take two slots in the reorder buffers.
5692
5693 This routine finds instructions which can be combined and combines
5694 them. We only support some of the potential combinations, and we
5695 only try common ways to find suitable instructions.
5696
5697 * addb can add two registers or a register and a small integer
5698 and jump to a nearby (+-8k) location. Normally the jump to the
5699 nearby location is conditional on the result of the add, but by
5700 using the "true" condition we can make the jump unconditional.
5701 Thus addb can perform two independent operations in one insn.
5702
5703 * movb is similar to addb in that it can perform a reg->reg
5704 or small immediate->reg copy and jump to a nearby (+-8k location).
5705
5706 * fmpyadd and fmpysub can perform a FP multiply and either an
5707 FP add or FP sub if the operands of the multiply and add/sub are
5708 independent (there are other minor restrictions). Note both
5709 the fmpy and fadd/fsub can in theory move to better spots according
5710 to data dependencies, but for now we require the fmpy stay at a
5711 fixed location.
5712
5713 * Many of the memory operations can perform pre & post updates
5714 of index registers. GCC's pre/post increment/decrement addressing
5715 is far too simple to take advantage of all the possibilities. This
5716 pass may not be suitable since those insns may not be independent.
5717
5718 * comclr can compare two ints or an int and a register, nullify
5719 the following instruction and zero some other register. This
5720 is more difficult to use as it's harder to find an insn which
5721 will generate a comclr than finding something like an unconditional
5722 branch. (conditional moves & long branches create comclr insns).
5723
5724 * Most arithmetic operations can conditionally skip the next
5725 instruction. They can be viewed as "perform this operation
5726 and conditionally jump to this nearby location" (where nearby
5727 is an insns away). These are difficult to use due to the
5728 branch length restrictions. */
5729
5730 pa_combine_instructions (insns)
5731 rtx insns;
5732 {
5733 rtx anchor, new;
5734
5735 /* This can get expensive since the basic algorithm is on the
5736 order of O(n^2) (or worse). Only do it for -O2 or higher
5737 levels of optimizaton. */
5738 if (optimize < 2)
5739 return;
5740
5741 /* Walk down the list of insns looking for "anchor" insns which
5742 may be combined with "floating" insns. As the name implies,
5743 "anchor" instructions don't move, while "floating" insns may
5744 move around. */
5745 new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
5746 new = make_insn_raw (new);
5747
5748 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
5749 {
5750 enum attr_pa_combine_type anchor_attr;
5751 enum attr_pa_combine_type floater_attr;
5752
5753 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
5754 Also ignore any special USE insns. */
5755 if (GET_CODE (anchor) != INSN
5756 && GET_CODE (anchor) != JUMP_INSN
5757 && GET_CODE (anchor) != CALL_INSN
5758 || GET_CODE (PATTERN (anchor)) == USE
5759 || GET_CODE (PATTERN (anchor)) == CLOBBER
5760 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
5761 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
5762 continue;
5763
5764 anchor_attr = get_attr_pa_combine_type (anchor);
5765 /* See if anchor is an insn suitable for combination. */
5766 if (anchor_attr == PA_COMBINE_TYPE_FMPY
5767 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
5768 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5769 && ! forward_branch_p (anchor)))
5770 {
5771 rtx floater;
5772
5773 for (floater = PREV_INSN (anchor);
5774 floater;
5775 floater = PREV_INSN (floater))
5776 {
5777 if (GET_CODE (floater) == NOTE
5778 || (GET_CODE (floater) == INSN
5779 && (GET_CODE (PATTERN (floater)) == USE
5780 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5781 continue;
5782
5783 /* Anything except a regular INSN will stop our search. */
5784 if (GET_CODE (floater) != INSN
5785 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5786 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5787 {
5788 floater = NULL_RTX;
5789 break;
5790 }
5791
5792 /* See if FLOATER is suitable for combination with the
5793 anchor. */
5794 floater_attr = get_attr_pa_combine_type (floater);
5795 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5796 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5797 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5798 && floater_attr == PA_COMBINE_TYPE_FMPY))
5799 {
5800 /* If ANCHOR and FLOATER can be combined, then we're
5801 done with this pass. */
5802 if (pa_can_combine_p (new, anchor, floater, 0,
5803 SET_DEST (PATTERN (floater)),
5804 XEXP (SET_SRC (PATTERN (floater)), 0),
5805 XEXP (SET_SRC (PATTERN (floater)), 1)))
5806 break;
5807 }
5808
5809 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
5810 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
5811 {
5812 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
5813 {
5814 if (pa_can_combine_p (new, anchor, floater, 0,
5815 SET_DEST (PATTERN (floater)),
5816 XEXP (SET_SRC (PATTERN (floater)), 0),
5817 XEXP (SET_SRC (PATTERN (floater)), 1)))
5818 break;
5819 }
5820 else
5821 {
5822 if (pa_can_combine_p (new, anchor, floater, 0,
5823 SET_DEST (PATTERN (floater)),
5824 SET_SRC (PATTERN (floater)),
5825 SET_SRC (PATTERN (floater))))
5826 break;
5827 }
5828 }
5829 }
5830
5831 /* If we didn't find anything on the backwards scan try forwards. */
5832 if (!floater
5833 && (anchor_attr == PA_COMBINE_TYPE_FMPY
5834 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
5835 {
5836 for (floater = anchor; floater; floater = NEXT_INSN (floater))
5837 {
5838 if (GET_CODE (floater) == NOTE
5839 || (GET_CODE (floater) == INSN
5840 && (GET_CODE (PATTERN (floater)) == USE
5841 || GET_CODE (PATTERN (floater)) == CLOBBER)))
5842
5843 continue;
5844
5845 /* Anything except a regular INSN will stop our search. */
5846 if (GET_CODE (floater) != INSN
5847 || GET_CODE (PATTERN (floater)) == ADDR_VEC
5848 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
5849 {
5850 floater = NULL_RTX;
5851 break;
5852 }
5853
5854 /* See if FLOATER is suitable for combination with the
5855 anchor. */
5856 floater_attr = get_attr_pa_combine_type (floater);
5857 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
5858 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
5859 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5860 && floater_attr == PA_COMBINE_TYPE_FMPY))
5861 {
5862 /* If ANCHOR and FLOATER can be combined, then we're
5863 done with this pass. */
5864 if (pa_can_combine_p (new, anchor, floater, 1,
5865 SET_DEST (PATTERN (floater)),
5866 XEXP (SET_SRC (PATTERN(floater)),0),
5867 XEXP(SET_SRC(PATTERN(floater)),1)))
5868 break;
5869 }
5870 }
5871 }
5872
5873 /* FLOATER will be nonzero if we found a suitable floating
5874 insn for combination with ANCHOR. */
5875 if (floater
5876 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
5877 || anchor_attr == PA_COMBINE_TYPE_FMPY))
5878 {
5879 /* Emit the new instruction and delete the old anchor. */
5880 emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
5881 gen_rtvec (2, PATTERN (anchor),
5882 PATTERN (floater))),
5883 anchor);
5884 PUT_CODE (anchor, NOTE);
5885 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5886 NOTE_SOURCE_FILE (anchor) = 0;
5887
5888 /* Emit a special USE insn for FLOATER, then delete
5889 the floating insn. */
5890 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5891 delete_insn (floater);
5892
5893 continue;
5894 }
5895 else if (floater
5896 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
5897 {
5898 rtx temp;
5899 /* Emit the new_jump instruction and delete the old anchor. */
5900 temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
5901 gen_rtvec (2, PATTERN (anchor),
5902 PATTERN (floater))),
5903 anchor);
5904 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
5905 PUT_CODE (anchor, NOTE);
5906 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
5907 NOTE_SOURCE_FILE (anchor) = 0;
5908
5909 /* Emit a special USE insn for FLOATER, then delete
5910 the floating insn. */
5911 emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
5912 delete_insn (floater);
5913 continue;
5914 }
5915 }
5916 }
5917 }
5918
5919 int
5920 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
5921 rtx new, anchor, floater;
5922 int reversed;
5923 rtx dest, src1, src2;
5924 {
5925 int insn_code_number;
5926 rtx start, end;
5927
5928 /* Create a PARALLEL with the patterns of ANCHOR and
5929 FLOATER, try to recognize it, then test constraints
5930 for the resulting pattern.
5931
5932 If the pattern doesn't match or the constraints
5933 aren't met keep searching for a suitable floater
5934 insn. */
5935 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
5936 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
5937 INSN_CODE (new) = -1;
5938 insn_code_number = recog_memoized (new);
5939 if (insn_code_number < 0
5940 || !constrain_operands (insn_code_number, 1))
5941 return 0;
5942
5943 if (reversed)
5944 {
5945 start = anchor;
5946 end = floater;
5947 }
5948 else
5949 {
5950 start = floater;
5951 end = anchor;
5952 }
5953
5954 /* There's up to three operands to consider. One
5955 output and two inputs.
5956
5957 The output must not be used between FLOATER & ANCHOR
5958 exclusive. The inputs must not be set between
5959 FLOATER and ANCHOR exclusive. */
5960
5961 if (reg_used_between_p (dest, start, end))
5962 return 0;
5963
5964 if (reg_set_between_p (src1, start, end))
5965 return 0;
5966
5967 if (reg_set_between_p (src2, start, end))
5968 return 0;
5969
5970 /* If we get here, then everything is good. */
5971 return 1;
5972 }