re PR target/26090 (IA-64 creates DT_TEXTREL binaries)
[gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "reload.h"
42 #include "obstack.h"
43 #include "except.h"
44 #include "function.h"
45 #include "toplev.h"
46 #include "ggc.h"
47 #include "integrate.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include <splay-tree.h>
54 #include "cfglayout.h"
55 #include "tree-gimple.h"
56 #include "tree-flow.h"
57 #include "tree-stdarg.h"
58 #include "tm-constrs.h"
59
60
61 /* Specify which cpu to schedule for. */
62 enum processor_type alpha_tune;
63
64 /* Which cpu we're generating code for. */
65 enum processor_type alpha_cpu;
66
67 static const char * const alpha_cpu_name[] =
68 {
69 "ev4", "ev5", "ev6"
70 };
71
72 /* Specify how accurate floating-point traps need to be. */
73
74 enum alpha_trap_precision alpha_tp;
75
76 /* Specify the floating-point rounding mode. */
77
78 enum alpha_fp_rounding_mode alpha_fprm;
79
80 /* Specify which things cause traps. */
81
82 enum alpha_fp_trap_mode alpha_fptm;
83
84 /* Save information from a "cmpxx" operation until the branch or scc is
85 emitted. */
86
87 struct alpha_compare alpha_compare;
88
89 /* Nonzero if inside of a function, because the Alpha asm can't
90 handle .files inside of functions. */
91
92 static int inside_function = FALSE;
93
94 /* The number of cycles of latency we should assume on memory reads. */
95
96 int alpha_memory_latency = 3;
97
98 /* Whether the function needs the GP. */
99
100 static int alpha_function_needs_gp;
101
102 /* The alias set for prologue/epilogue register save/restore. */
103
104 static GTY(()) int alpha_sr_alias_set;
105
106 /* The assembler name of the current function. */
107
108 static const char *alpha_fnname;
109
110 /* The next explicit relocation sequence number. */
111 extern GTY(()) int alpha_next_sequence_number;
112 int alpha_next_sequence_number = 1;
113
114 /* The literal and gpdisp sequence numbers for this insn, as printed
115 by %# and %* respectively. */
116 extern GTY(()) int alpha_this_literal_sequence_number;
117 extern GTY(()) int alpha_this_gpdisp_sequence_number;
118 int alpha_this_literal_sequence_number;
119 int alpha_this_gpdisp_sequence_number;
120
121 /* Costs of various operations on the different architectures. */
122
123 struct alpha_rtx_cost_data
124 {
125 unsigned char fp_add;
126 unsigned char fp_mult;
127 unsigned char fp_div_sf;
128 unsigned char fp_div_df;
129 unsigned char int_mult_si;
130 unsigned char int_mult_di;
131 unsigned char int_shift;
132 unsigned char int_cmov;
133 unsigned short int_div;
134 };
135
136 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
137 {
138 { /* EV4 */
139 COSTS_N_INSNS (6), /* fp_add */
140 COSTS_N_INSNS (6), /* fp_mult */
141 COSTS_N_INSNS (34), /* fp_div_sf */
142 COSTS_N_INSNS (63), /* fp_div_df */
143 COSTS_N_INSNS (23), /* int_mult_si */
144 COSTS_N_INSNS (23), /* int_mult_di */
145 COSTS_N_INSNS (2), /* int_shift */
146 COSTS_N_INSNS (2), /* int_cmov */
147 COSTS_N_INSNS (97), /* int_div */
148 },
149 { /* EV5 */
150 COSTS_N_INSNS (4), /* fp_add */
151 COSTS_N_INSNS (4), /* fp_mult */
152 COSTS_N_INSNS (15), /* fp_div_sf */
153 COSTS_N_INSNS (22), /* fp_div_df */
154 COSTS_N_INSNS (8), /* int_mult_si */
155 COSTS_N_INSNS (12), /* int_mult_di */
156 COSTS_N_INSNS (1) + 1, /* int_shift */
157 COSTS_N_INSNS (1), /* int_cmov */
158 COSTS_N_INSNS (83), /* int_div */
159 },
160 { /* EV6 */
161 COSTS_N_INSNS (4), /* fp_add */
162 COSTS_N_INSNS (4), /* fp_mult */
163 COSTS_N_INSNS (12), /* fp_div_sf */
164 COSTS_N_INSNS (15), /* fp_div_df */
165 COSTS_N_INSNS (7), /* int_mult_si */
166 COSTS_N_INSNS (7), /* int_mult_di */
167 COSTS_N_INSNS (1), /* int_shift */
168 COSTS_N_INSNS (2), /* int_cmov */
169 COSTS_N_INSNS (86), /* int_div */
170 },
171 };
172
173 /* Similar but tuned for code size instead of execution latency. The
174 extra +N is fractional cost tuning based on latency. It's used to
175 encourage use of cheaper insns like shift, but only if there's just
176 one of them. */
177
178 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
179 {
180 COSTS_N_INSNS (1), /* fp_add */
181 COSTS_N_INSNS (1), /* fp_mult */
182 COSTS_N_INSNS (1), /* fp_div_sf */
183 COSTS_N_INSNS (1) + 1, /* fp_div_df */
184 COSTS_N_INSNS (1) + 1, /* int_mult_si */
185 COSTS_N_INSNS (1) + 2, /* int_mult_di */
186 COSTS_N_INSNS (1), /* int_shift */
187 COSTS_N_INSNS (1), /* int_cmov */
188 COSTS_N_INSNS (6), /* int_div */
189 };
190
191 /* Get the number of args of a function in one of two ways. */
192 #if TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK
193 #define NUM_ARGS current_function_args_info.num_args
194 #else
195 #define NUM_ARGS current_function_args_info
196 #endif
197
198 #define REG_PV 27
199 #define REG_RA 26
200
201 /* Declarations of static functions. */
202 static struct machine_function *alpha_init_machine_status (void);
203 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
204
205 #if TARGET_ABI_OPEN_VMS
206 static void alpha_write_linkage (FILE *, const char *, tree);
207 #endif
208
209 static void unicosmk_output_deferred_case_vectors (FILE *);
210 static void unicosmk_gen_dsib (unsigned long *);
211 static void unicosmk_output_ssib (FILE *, const char *);
212 static int unicosmk_need_dex (rtx);
213 \f
214 /* Implement TARGET_HANDLE_OPTION. */
215
216 static bool
217 alpha_handle_option (size_t code, const char *arg, int value)
218 {
219 switch (code)
220 {
221 case OPT_mfp_regs:
222 if (value == 0)
223 target_flags |= MASK_SOFT_FP;
224 break;
225
226 case OPT_mieee:
227 case OPT_mieee_with_inexact:
228 target_flags |= MASK_IEEE_CONFORMANT;
229 break;
230
231 case OPT_mtls_size_:
232 if (value != 16 && value != 32 && value != 64)
233 error ("bad value %qs for -mtls-size switch", arg);
234 break;
235 }
236
237 return true;
238 }
239
240 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
241 /* Implement TARGET_MANGLE_FUNDAMENTAL_TYPE. */
242
243 static const char *
244 alpha_mangle_fundamental_type (tree type)
245 {
246 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
247 && TARGET_LONG_DOUBLE_128)
248 return "g";
249
250 /* For all other types, use normal C++ mangling. */
251 return NULL;
252 }
253 #endif
254
255 /* Parse target option strings. */
256
257 void
258 override_options (void)
259 {
260 static const struct cpu_table {
261 const char *const name;
262 const enum processor_type processor;
263 const int flags;
264 } cpu_table[] = {
265 { "ev4", PROCESSOR_EV4, 0 },
266 { "ev45", PROCESSOR_EV4, 0 },
267 { "21064", PROCESSOR_EV4, 0 },
268 { "ev5", PROCESSOR_EV5, 0 },
269 { "21164", PROCESSOR_EV5, 0 },
270 { "ev56", PROCESSOR_EV5, MASK_BWX },
271 { "21164a", PROCESSOR_EV5, MASK_BWX },
272 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX },
273 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
274 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
275 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
276 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
277 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
278 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
279 { 0, 0, 0 }
280 };
281
282 int i;
283
284 /* Unicos/Mk doesn't have shared libraries. */
285 if (TARGET_ABI_UNICOSMK && flag_pic)
286 {
287 warning (0, "-f%s ignored for Unicos/Mk (not supported)",
288 (flag_pic > 1) ? "PIC" : "pic");
289 flag_pic = 0;
290 }
291
292 /* On Unicos/Mk, the native compiler consistently generates /d suffices for
293 floating-point instructions. Make that the default for this target. */
294 if (TARGET_ABI_UNICOSMK)
295 alpha_fprm = ALPHA_FPRM_DYN;
296 else
297 alpha_fprm = ALPHA_FPRM_NORM;
298
299 alpha_tp = ALPHA_TP_PROG;
300 alpha_fptm = ALPHA_FPTM_N;
301
302 /* We cannot use su and sui qualifiers for conversion instructions on
303 Unicos/Mk. I'm not sure if this is due to assembler or hardware
304 limitations. Right now, we issue a warning if -mieee is specified
305 and then ignore it; eventually, we should either get it right or
306 disable the option altogether. */
307
308 if (TARGET_IEEE)
309 {
310 if (TARGET_ABI_UNICOSMK)
311 warning (0, "-mieee not supported on Unicos/Mk");
312 else
313 {
314 alpha_tp = ALPHA_TP_INSN;
315 alpha_fptm = ALPHA_FPTM_SU;
316 }
317 }
318
319 if (TARGET_IEEE_WITH_INEXACT)
320 {
321 if (TARGET_ABI_UNICOSMK)
322 warning (0, "-mieee-with-inexact not supported on Unicos/Mk");
323 else
324 {
325 alpha_tp = ALPHA_TP_INSN;
326 alpha_fptm = ALPHA_FPTM_SUI;
327 }
328 }
329
330 if (alpha_tp_string)
331 {
332 if (! strcmp (alpha_tp_string, "p"))
333 alpha_tp = ALPHA_TP_PROG;
334 else if (! strcmp (alpha_tp_string, "f"))
335 alpha_tp = ALPHA_TP_FUNC;
336 else if (! strcmp (alpha_tp_string, "i"))
337 alpha_tp = ALPHA_TP_INSN;
338 else
339 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
340 }
341
342 if (alpha_fprm_string)
343 {
344 if (! strcmp (alpha_fprm_string, "n"))
345 alpha_fprm = ALPHA_FPRM_NORM;
346 else if (! strcmp (alpha_fprm_string, "m"))
347 alpha_fprm = ALPHA_FPRM_MINF;
348 else if (! strcmp (alpha_fprm_string, "c"))
349 alpha_fprm = ALPHA_FPRM_CHOP;
350 else if (! strcmp (alpha_fprm_string,"d"))
351 alpha_fprm = ALPHA_FPRM_DYN;
352 else
353 error ("bad value %qs for -mfp-rounding-mode switch",
354 alpha_fprm_string);
355 }
356
357 if (alpha_fptm_string)
358 {
359 if (strcmp (alpha_fptm_string, "n") == 0)
360 alpha_fptm = ALPHA_FPTM_N;
361 else if (strcmp (alpha_fptm_string, "u") == 0)
362 alpha_fptm = ALPHA_FPTM_U;
363 else if (strcmp (alpha_fptm_string, "su") == 0)
364 alpha_fptm = ALPHA_FPTM_SU;
365 else if (strcmp (alpha_fptm_string, "sui") == 0)
366 alpha_fptm = ALPHA_FPTM_SUI;
367 else
368 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
369 }
370
371 if (alpha_cpu_string)
372 {
373 for (i = 0; cpu_table [i].name; i++)
374 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
375 {
376 alpha_tune = alpha_cpu = cpu_table [i].processor;
377 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
378 target_flags |= cpu_table [i].flags;
379 break;
380 }
381 if (! cpu_table [i].name)
382 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
383 }
384
385 if (alpha_tune_string)
386 {
387 for (i = 0; cpu_table [i].name; i++)
388 if (! strcmp (alpha_tune_string, cpu_table [i].name))
389 {
390 alpha_tune = cpu_table [i].processor;
391 break;
392 }
393 if (! cpu_table [i].name)
394 error ("bad value %qs for -mcpu switch", alpha_tune_string);
395 }
396
397 /* Do some sanity checks on the above options. */
398
399 if (TARGET_ABI_UNICOSMK && alpha_fptm != ALPHA_FPTM_N)
400 {
401 warning (0, "trap mode not supported on Unicos/Mk");
402 alpha_fptm = ALPHA_FPTM_N;
403 }
404
405 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
406 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
407 {
408 warning (0, "fp software completion requires -mtrap-precision=i");
409 alpha_tp = ALPHA_TP_INSN;
410 }
411
412 if (alpha_cpu == PROCESSOR_EV6)
413 {
414 /* Except for EV6 pass 1 (not released), we always have precise
415 arithmetic traps. Which means we can do software completion
416 without minding trap shadows. */
417 alpha_tp = ALPHA_TP_PROG;
418 }
419
420 if (TARGET_FLOAT_VAX)
421 {
422 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
423 {
424 warning (0, "rounding mode not supported for VAX floats");
425 alpha_fprm = ALPHA_FPRM_NORM;
426 }
427 if (alpha_fptm == ALPHA_FPTM_SUI)
428 {
429 warning (0, "trap mode not supported for VAX floats");
430 alpha_fptm = ALPHA_FPTM_SU;
431 }
432 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
433 warning (0, "128-bit long double not supported for VAX floats");
434 target_flags &= ~MASK_LONG_DOUBLE_128;
435 }
436
437 {
438 char *end;
439 int lat;
440
441 if (!alpha_mlat_string)
442 alpha_mlat_string = "L1";
443
444 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
445 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
446 ;
447 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
448 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
449 && alpha_mlat_string[2] == '\0')
450 {
451 static int const cache_latency[][4] =
452 {
453 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
454 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
455 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
456 };
457
458 lat = alpha_mlat_string[1] - '0';
459 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
460 {
461 warning (0, "L%d cache latency unknown for %s",
462 lat, alpha_cpu_name[alpha_tune]);
463 lat = 3;
464 }
465 else
466 lat = cache_latency[alpha_tune][lat-1];
467 }
468 else if (! strcmp (alpha_mlat_string, "main"))
469 {
470 /* Most current memories have about 370ns latency. This is
471 a reasonable guess for a fast cpu. */
472 lat = 150;
473 }
474 else
475 {
476 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
477 lat = 3;
478 }
479
480 alpha_memory_latency = lat;
481 }
482
483 /* Default the definition of "small data" to 8 bytes. */
484 if (!g_switch_set)
485 g_switch_value = 8;
486
487 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
488 if (flag_pic == 1)
489 target_flags |= MASK_SMALL_DATA;
490 else if (flag_pic == 2)
491 target_flags &= ~MASK_SMALL_DATA;
492
493 /* Align labels and loops for optimal branching. */
494 /* ??? Kludge these by not doing anything if we don't optimize and also if
495 we are writing ECOFF symbols to work around a bug in DEC's assembler. */
496 if (optimize > 0 && write_symbols != SDB_DEBUG)
497 {
498 if (align_loops <= 0)
499 align_loops = 16;
500 if (align_jumps <= 0)
501 align_jumps = 16;
502 }
503 if (align_functions <= 0)
504 align_functions = 16;
505
506 /* Acquire a unique set number for our register saves and restores. */
507 alpha_sr_alias_set = new_alias_set ();
508
509 /* Register variables and functions with the garbage collector. */
510
511 /* Set up function hooks. */
512 init_machine_status = alpha_init_machine_status;
513
514 /* Tell the compiler when we're using VAX floating point. */
515 if (TARGET_FLOAT_VAX)
516 {
517 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
518 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
519 REAL_MODE_FORMAT (TFmode) = NULL;
520 }
521
522 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
523 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
524 target_flags |= MASK_LONG_DOUBLE_128;
525 #endif
526 }
527 \f
528 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
529
530 int
531 zap_mask (HOST_WIDE_INT value)
532 {
533 int i;
534
535 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
536 i++, value >>= 8)
537 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
538 return 0;
539
540 return 1;
541 }
542
543 /* Return true if OP is valid for a particular TLS relocation.
544 We are already guaranteed that OP is a CONST. */
545
546 int
547 tls_symbolic_operand_1 (rtx op, int size, int unspec)
548 {
549 op = XEXP (op, 0);
550
551 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
552 return 0;
553 op = XVECEXP (op, 0, 0);
554
555 if (GET_CODE (op) != SYMBOL_REF)
556 return 0;
557
558 switch (SYMBOL_REF_TLS_MODEL (op))
559 {
560 case TLS_MODEL_LOCAL_DYNAMIC:
561 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
562 case TLS_MODEL_INITIAL_EXEC:
563 return unspec == UNSPEC_TPREL && size == 64;
564 case TLS_MODEL_LOCAL_EXEC:
565 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
566 default:
567 gcc_unreachable ();
568 }
569 }
570
571 /* Used by aligned_memory_operand and unaligned_memory_operand to
572 resolve what reload is going to do with OP if it's a register. */
573
574 rtx
575 resolve_reload_operand (rtx op)
576 {
577 if (reload_in_progress)
578 {
579 rtx tmp = op;
580 if (GET_CODE (tmp) == SUBREG)
581 tmp = SUBREG_REG (tmp);
582 if (GET_CODE (tmp) == REG
583 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
584 {
585 op = reg_equiv_memory_loc[REGNO (tmp)];
586 if (op == 0)
587 return 0;
588 }
589 }
590 return op;
591 }
592
593 /* The scalar modes supported differs from the default check-what-c-supports
594 version in that sometimes TFmode is available even when long double
595 indicates only DFmode. On unicosmk, we have the situation that HImode
596 doesn't map to any C type, but of course we still support that. */
597
598 static bool
599 alpha_scalar_mode_supported_p (enum machine_mode mode)
600 {
601 switch (mode)
602 {
603 case QImode:
604 case HImode:
605 case SImode:
606 case DImode:
607 case TImode: /* via optabs.c */
608 return true;
609
610 case SFmode:
611 case DFmode:
612 return true;
613
614 case TFmode:
615 return TARGET_HAS_XFLOATING_LIBS;
616
617 default:
618 return false;
619 }
620 }
621
622 /* Alpha implements a couple of integer vector mode operations when
623 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
624 which allows the vectorizer to operate on e.g. move instructions,
625 or when expand_vector_operations can do something useful. */
626
627 static bool
628 alpha_vector_mode_supported_p (enum machine_mode mode)
629 {
630 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
631 }
632
633 /* Return 1 if this function can directly return via $26. */
634
635 int
636 direct_return (void)
637 {
638 return (! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK
639 && reload_completed
640 && alpha_sa_size () == 0
641 && get_frame_size () == 0
642 && current_function_outgoing_args_size == 0
643 && current_function_pretend_args_size == 0);
644 }
645
646 /* Return the ADDR_VEC associated with a tablejump insn. */
647
648 rtx
649 alpha_tablejump_addr_vec (rtx insn)
650 {
651 rtx tmp;
652
653 tmp = JUMP_LABEL (insn);
654 if (!tmp)
655 return NULL_RTX;
656 tmp = NEXT_INSN (tmp);
657 if (!tmp)
658 return NULL_RTX;
659 if (GET_CODE (tmp) == JUMP_INSN
660 && GET_CODE (PATTERN (tmp)) == ADDR_DIFF_VEC)
661 return PATTERN (tmp);
662 return NULL_RTX;
663 }
664
665 /* Return the label of the predicted edge, or CONST0_RTX if we don't know. */
666
667 rtx
668 alpha_tablejump_best_label (rtx insn)
669 {
670 rtx jump_table = alpha_tablejump_addr_vec (insn);
671 rtx best_label = NULL_RTX;
672
673 /* ??? Once the CFG doesn't keep getting completely rebuilt, look
674 there for edge frequency counts from profile data. */
675
676 if (jump_table)
677 {
678 int n_labels = XVECLEN (jump_table, 1);
679 int best_count = -1;
680 int i, j;
681
682 for (i = 0; i < n_labels; i++)
683 {
684 int count = 1;
685
686 for (j = i + 1; j < n_labels; j++)
687 if (XEXP (XVECEXP (jump_table, 1, i), 0)
688 == XEXP (XVECEXP (jump_table, 1, j), 0))
689 count++;
690
691 if (count > best_count)
692 best_count = count, best_label = XVECEXP (jump_table, 1, i);
693 }
694 }
695
696 return best_label ? best_label : const0_rtx;
697 }
698
699 /* Return the TLS model to use for SYMBOL. */
700
701 static enum tls_model
702 tls_symbolic_operand_type (rtx symbol)
703 {
704 enum tls_model model;
705
706 if (GET_CODE (symbol) != SYMBOL_REF)
707 return 0;
708 model = SYMBOL_REF_TLS_MODEL (symbol);
709
710 /* Local-exec with a 64-bit size is the same code as initial-exec. */
711 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
712 model = TLS_MODEL_INITIAL_EXEC;
713
714 return model;
715 }
716 \f
717 /* Return true if the function DECL will share the same GP as any
718 function in the current unit of translation. */
719
720 static bool
721 decl_has_samegp (tree decl)
722 {
723 /* Functions that are not local can be overridden, and thus may
724 not share the same gp. */
725 if (!(*targetm.binds_local_p) (decl))
726 return false;
727
728 /* If -msmall-data is in effect, assume that there is only one GP
729 for the module, and so any local symbol has this property. We
730 need explicit relocations to be able to enforce this for symbols
731 not defined in this unit of translation, however. */
732 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
733 return true;
734
735 /* Functions that are not external are defined in this UoT. */
736 /* ??? Irritatingly, static functions not yet emitted are still
737 marked "external". Apply this to non-static functions only. */
738 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
739 }
740
741 /* Return true if EXP should be placed in the small data section. */
742
743 static bool
744 alpha_in_small_data_p (tree exp)
745 {
746 /* We want to merge strings, so we never consider them small data. */
747 if (TREE_CODE (exp) == STRING_CST)
748 return false;
749
750 /* Functions are never in the small data area. Duh. */
751 if (TREE_CODE (exp) == FUNCTION_DECL)
752 return false;
753
754 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
755 {
756 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
757 if (strcmp (section, ".sdata") == 0
758 || strcmp (section, ".sbss") == 0)
759 return true;
760 }
761 else
762 {
763 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
764
765 /* If this is an incomplete type with size 0, then we can't put it
766 in sdata because it might be too big when completed. */
767 if (size > 0 && (unsigned HOST_WIDE_INT) size <= g_switch_value)
768 return true;
769 }
770
771 return false;
772 }
773
774 #if TARGET_ABI_OPEN_VMS
775 static bool
776 alpha_linkage_symbol_p (const char *symname)
777 {
778 int symlen = strlen (symname);
779
780 if (symlen > 4)
781 return strcmp (&symname [symlen - 4], "..lk") == 0;
782
783 return false;
784 }
785
786 #define LINKAGE_SYMBOL_REF_P(X) \
787 ((GET_CODE (X) == SYMBOL_REF \
788 && alpha_linkage_symbol_p (XSTR (X, 0))) \
789 || (GET_CODE (X) == CONST \
790 && GET_CODE (XEXP (X, 0)) == PLUS \
791 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
792 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
793 #endif
794
795 /* legitimate_address_p recognizes an RTL expression that is a valid
796 memory address for an instruction. The MODE argument is the
797 machine mode for the MEM expression that wants to use this address.
798
799 For Alpha, we have either a constant address or the sum of a
800 register and a constant address, or just a register. For DImode,
801 any of those forms can be surrounded with an AND that clear the
802 low-order three bits; this is an "unaligned" access. */
803
804 bool
805 alpha_legitimate_address_p (enum machine_mode mode, rtx x, int strict)
806 {
807 /* If this is an ldq_u type address, discard the outer AND. */
808 if (mode == DImode
809 && GET_CODE (x) == AND
810 && GET_CODE (XEXP (x, 1)) == CONST_INT
811 && INTVAL (XEXP (x, 1)) == -8)
812 x = XEXP (x, 0);
813
814 /* Discard non-paradoxical subregs. */
815 if (GET_CODE (x) == SUBREG
816 && (GET_MODE_SIZE (GET_MODE (x))
817 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
818 x = SUBREG_REG (x);
819
820 /* Unadorned general registers are valid. */
821 if (REG_P (x)
822 && (strict
823 ? STRICT_REG_OK_FOR_BASE_P (x)
824 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
825 return true;
826
827 /* Constant addresses (i.e. +/- 32k) are valid. */
828 if (CONSTANT_ADDRESS_P (x))
829 return true;
830
831 #if TARGET_ABI_OPEN_VMS
832 if (LINKAGE_SYMBOL_REF_P (x))
833 return true;
834 #endif
835
836 /* Register plus a small constant offset is valid. */
837 if (GET_CODE (x) == PLUS)
838 {
839 rtx ofs = XEXP (x, 1);
840 x = XEXP (x, 0);
841
842 /* Discard non-paradoxical subregs. */
843 if (GET_CODE (x) == SUBREG
844 && (GET_MODE_SIZE (GET_MODE (x))
845 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
846 x = SUBREG_REG (x);
847
848 if (REG_P (x))
849 {
850 if (! strict
851 && NONSTRICT_REG_OK_FP_BASE_P (x)
852 && GET_CODE (ofs) == CONST_INT)
853 return true;
854 if ((strict
855 ? STRICT_REG_OK_FOR_BASE_P (x)
856 : NONSTRICT_REG_OK_FOR_BASE_P (x))
857 && CONSTANT_ADDRESS_P (ofs))
858 return true;
859 }
860 }
861
862 /* If we're managing explicit relocations, LO_SUM is valid, as
863 are small data symbols. */
864 else if (TARGET_EXPLICIT_RELOCS)
865 {
866 if (small_symbolic_operand (x, Pmode))
867 return true;
868
869 if (GET_CODE (x) == LO_SUM)
870 {
871 rtx ofs = XEXP (x, 1);
872 x = XEXP (x, 0);
873
874 /* Discard non-paradoxical subregs. */
875 if (GET_CODE (x) == SUBREG
876 && (GET_MODE_SIZE (GET_MODE (x))
877 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
878 x = SUBREG_REG (x);
879
880 /* Must have a valid base register. */
881 if (! (REG_P (x)
882 && (strict
883 ? STRICT_REG_OK_FOR_BASE_P (x)
884 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
885 return false;
886
887 /* The symbol must be local. */
888 if (local_symbolic_operand (ofs, Pmode)
889 || dtp32_symbolic_operand (ofs, Pmode)
890 || tp32_symbolic_operand (ofs, Pmode))
891 return true;
892 }
893 }
894
895 return false;
896 }
897
898 /* Build the SYMBOL_REF for __tls_get_addr. */
899
900 static GTY(()) rtx tls_get_addr_libfunc;
901
902 static rtx
903 get_tls_get_addr (void)
904 {
905 if (!tls_get_addr_libfunc)
906 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
907 return tls_get_addr_libfunc;
908 }
909
910 /* Try machine-dependent ways of modifying an illegitimate address
911 to be legitimate. If we find one, return the new, valid address. */
912
913 rtx
914 alpha_legitimize_address (rtx x, rtx scratch,
915 enum machine_mode mode ATTRIBUTE_UNUSED)
916 {
917 HOST_WIDE_INT addend;
918
919 /* If the address is (plus reg const_int) and the CONST_INT is not a
920 valid offset, compute the high part of the constant and add it to
921 the register. Then our address is (plus temp low-part-const). */
922 if (GET_CODE (x) == PLUS
923 && GET_CODE (XEXP (x, 0)) == REG
924 && GET_CODE (XEXP (x, 1)) == CONST_INT
925 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
926 {
927 addend = INTVAL (XEXP (x, 1));
928 x = XEXP (x, 0);
929 goto split_addend;
930 }
931
932 /* If the address is (const (plus FOO const_int)), find the low-order
933 part of the CONST_INT. Then load FOO plus any high-order part of the
934 CONST_INT into a register. Our address is (plus reg low-part-const).
935 This is done to reduce the number of GOT entries. */
936 if (!no_new_pseudos
937 && GET_CODE (x) == CONST
938 && GET_CODE (XEXP (x, 0)) == PLUS
939 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
940 {
941 addend = INTVAL (XEXP (XEXP (x, 0), 1));
942 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
943 goto split_addend;
944 }
945
946 /* If we have a (plus reg const), emit the load as in (2), then add
947 the two registers, and finally generate (plus reg low-part-const) as
948 our address. */
949 if (!no_new_pseudos
950 && GET_CODE (x) == PLUS
951 && GET_CODE (XEXP (x, 0)) == REG
952 && GET_CODE (XEXP (x, 1)) == CONST
953 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
954 && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 1)) == CONST_INT)
955 {
956 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
957 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
958 XEXP (XEXP (XEXP (x, 1), 0), 0),
959 NULL_RTX, 1, OPTAB_LIB_WIDEN);
960 goto split_addend;
961 }
962
963 /* If this is a local symbol, split the address into HIGH/LO_SUM parts. */
964 if (TARGET_EXPLICIT_RELOCS && symbolic_operand (x, Pmode))
965 {
966 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
967
968 switch (tls_symbolic_operand_type (x))
969 {
970 case TLS_MODEL_NONE:
971 break;
972
973 case TLS_MODEL_GLOBAL_DYNAMIC:
974 start_sequence ();
975
976 r0 = gen_rtx_REG (Pmode, 0);
977 r16 = gen_rtx_REG (Pmode, 16);
978 tga = get_tls_get_addr ();
979 dest = gen_reg_rtx (Pmode);
980 seq = GEN_INT (alpha_next_sequence_number++);
981
982 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
983 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
984 insn = emit_call_insn (insn);
985 CONST_OR_PURE_CALL_P (insn) = 1;
986 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
987
988 insn = get_insns ();
989 end_sequence ();
990
991 emit_libcall_block (insn, dest, r0, x);
992 return dest;
993
994 case TLS_MODEL_LOCAL_DYNAMIC:
995 start_sequence ();
996
997 r0 = gen_rtx_REG (Pmode, 0);
998 r16 = gen_rtx_REG (Pmode, 16);
999 tga = get_tls_get_addr ();
1000 scratch = gen_reg_rtx (Pmode);
1001 seq = GEN_INT (alpha_next_sequence_number++);
1002
1003 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1004 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1005 insn = emit_call_insn (insn);
1006 CONST_OR_PURE_CALL_P (insn) = 1;
1007 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1008
1009 insn = get_insns ();
1010 end_sequence ();
1011
1012 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1013 UNSPEC_TLSLDM_CALL);
1014 emit_libcall_block (insn, scratch, r0, eqv);
1015
1016 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1017 eqv = gen_rtx_CONST (Pmode, eqv);
1018
1019 if (alpha_tls_size == 64)
1020 {
1021 dest = gen_reg_rtx (Pmode);
1022 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1023 emit_insn (gen_adddi3 (dest, dest, scratch));
1024 return dest;
1025 }
1026 if (alpha_tls_size == 32)
1027 {
1028 insn = gen_rtx_HIGH (Pmode, eqv);
1029 insn = gen_rtx_PLUS (Pmode, scratch, insn);
1030 scratch = gen_reg_rtx (Pmode);
1031 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1032 }
1033 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1034
1035 case TLS_MODEL_INITIAL_EXEC:
1036 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1037 eqv = gen_rtx_CONST (Pmode, eqv);
1038 tp = gen_reg_rtx (Pmode);
1039 scratch = gen_reg_rtx (Pmode);
1040 dest = gen_reg_rtx (Pmode);
1041
1042 emit_insn (gen_load_tp (tp));
1043 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1044 emit_insn (gen_adddi3 (dest, tp, scratch));
1045 return dest;
1046
1047 case TLS_MODEL_LOCAL_EXEC:
1048 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1049 eqv = gen_rtx_CONST (Pmode, eqv);
1050 tp = gen_reg_rtx (Pmode);
1051
1052 emit_insn (gen_load_tp (tp));
1053 if (alpha_tls_size == 32)
1054 {
1055 insn = gen_rtx_HIGH (Pmode, eqv);
1056 insn = gen_rtx_PLUS (Pmode, tp, insn);
1057 tp = gen_reg_rtx (Pmode);
1058 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1059 }
1060 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1061
1062 default:
1063 gcc_unreachable ();
1064 }
1065
1066 if (local_symbolic_operand (x, Pmode))
1067 {
1068 if (small_symbolic_operand (x, Pmode))
1069 return x;
1070 else
1071 {
1072 if (!no_new_pseudos)
1073 scratch = gen_reg_rtx (Pmode);
1074 emit_insn (gen_rtx_SET (VOIDmode, scratch,
1075 gen_rtx_HIGH (Pmode, x)));
1076 return gen_rtx_LO_SUM (Pmode, scratch, x);
1077 }
1078 }
1079 }
1080
1081 return NULL;
1082
1083 split_addend:
1084 {
1085 HOST_WIDE_INT low, high;
1086
1087 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1088 addend -= low;
1089 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1090 addend -= high;
1091
1092 if (addend)
1093 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1094 (no_new_pseudos ? scratch : NULL_RTX),
1095 1, OPTAB_LIB_WIDEN);
1096 if (high)
1097 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1098 (no_new_pseudos ? scratch : NULL_RTX),
1099 1, OPTAB_LIB_WIDEN);
1100
1101 return plus_constant (x, low);
1102 }
1103 }
1104
1105 /* Primarily this is required for TLS symbols, but given that our move
1106 patterns *ought* to be able to handle any symbol at any time, we
1107 should never be spilling symbolic operands to the constant pool, ever. */
1108
1109 static bool
1110 alpha_cannot_force_const_mem (rtx x)
1111 {
1112 enum rtx_code code = GET_CODE (x);
1113 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1114 }
1115
1116 /* We do not allow indirect calls to be optimized into sibling calls, nor
1117 can we allow a call to a function with a different GP to be optimized
1118 into a sibcall. */
1119
1120 static bool
1121 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1122 {
1123 /* Can't do indirect tail calls, since we don't know if the target
1124 uses the same GP. */
1125 if (!decl)
1126 return false;
1127
1128 /* Otherwise, we can make a tail call if the target function shares
1129 the same GP. */
1130 return decl_has_samegp (decl);
1131 }
1132
1133 int
1134 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1135 {
1136 rtx x = *px;
1137
1138 /* Don't re-split. */
1139 if (GET_CODE (x) == LO_SUM)
1140 return -1;
1141
1142 return small_symbolic_operand (x, Pmode) != 0;
1143 }
1144
1145 static int
1146 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1147 {
1148 rtx x = *px;
1149
1150 /* Don't re-split. */
1151 if (GET_CODE (x) == LO_SUM)
1152 return -1;
1153
1154 if (small_symbolic_operand (x, Pmode))
1155 {
1156 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1157 *px = x;
1158 return -1;
1159 }
1160
1161 return 0;
1162 }
1163
1164 rtx
1165 split_small_symbolic_operand (rtx x)
1166 {
1167 x = copy_insn (x);
1168 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1169 return x;
1170 }
1171
1172 /* Indicate that INSN cannot be duplicated. This is true for any insn
1173 that we've marked with gpdisp relocs, since those have to stay in
1174 1-1 correspondence with one another.
1175
1176 Technically we could copy them if we could set up a mapping from one
1177 sequence number to another, across the set of insns to be duplicated.
1178 This seems overly complicated and error-prone since interblock motion
1179 from sched-ebb could move one of the pair of insns to a different block.
1180
1181 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1182 then they'll be in a different block from their ldgp. Which could lead
1183 the bb reorder code to think that it would be ok to copy just the block
1184 containing the call and branch to the block containing the ldgp. */
1185
1186 static bool
1187 alpha_cannot_copy_insn_p (rtx insn)
1188 {
1189 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1190 return false;
1191 if (recog_memoized (insn) >= 0)
1192 return get_attr_cannot_copy (insn);
1193 else
1194 return false;
1195 }
1196
1197
1198 /* Try a machine-dependent way of reloading an illegitimate address
1199 operand. If we find one, push the reload and return the new rtx. */
1200
1201 rtx
1202 alpha_legitimize_reload_address (rtx x,
1203 enum machine_mode mode ATTRIBUTE_UNUSED,
1204 int opnum, int type,
1205 int ind_levels ATTRIBUTE_UNUSED)
1206 {
1207 /* We must recognize output that we have already generated ourselves. */
1208 if (GET_CODE (x) == PLUS
1209 && GET_CODE (XEXP (x, 0)) == PLUS
1210 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
1211 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1212 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1213 {
1214 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1215 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1216 opnum, type);
1217 return x;
1218 }
1219
1220 /* We wish to handle large displacements off a base register by
1221 splitting the addend across an ldah and the mem insn. This
1222 cuts number of extra insns needed from 3 to 1. */
1223 if (GET_CODE (x) == PLUS
1224 && GET_CODE (XEXP (x, 0)) == REG
1225 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1226 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1227 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1228 {
1229 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1230 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1231 HOST_WIDE_INT high
1232 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1233
1234 /* Check for 32-bit overflow. */
1235 if (high + low != val)
1236 return NULL_RTX;
1237
1238 /* Reload the high part into a base reg; leave the low part
1239 in the mem directly. */
1240 x = gen_rtx_PLUS (GET_MODE (x),
1241 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1242 GEN_INT (high)),
1243 GEN_INT (low));
1244
1245 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1246 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1247 opnum, type);
1248 return x;
1249 }
1250
1251 return NULL_RTX;
1252 }
1253 \f
1254 /* Compute a (partial) cost for rtx X. Return true if the complete
1255 cost has been computed, and false if subexpressions should be
1256 scanned. In either case, *TOTAL contains the cost result. */
1257
1258 static bool
1259 alpha_rtx_costs (rtx x, int code, int outer_code, int *total)
1260 {
1261 enum machine_mode mode = GET_MODE (x);
1262 bool float_mode_p = FLOAT_MODE_P (mode);
1263 const struct alpha_rtx_cost_data *cost_data;
1264
1265 if (optimize_size)
1266 cost_data = &alpha_rtx_cost_size;
1267 else
1268 cost_data = &alpha_rtx_cost_data[alpha_tune];
1269
1270 switch (code)
1271 {
1272 case CONST_INT:
1273 /* If this is an 8-bit constant, return zero since it can be used
1274 nearly anywhere with no cost. If it is a valid operand for an
1275 ADD or AND, likewise return 0 if we know it will be used in that
1276 context. Otherwise, return 2 since it might be used there later.
1277 All other constants take at least two insns. */
1278 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1279 {
1280 *total = 0;
1281 return true;
1282 }
1283 /* FALLTHRU */
1284
1285 case CONST_DOUBLE:
1286 if (x == CONST0_RTX (mode))
1287 *total = 0;
1288 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1289 || (outer_code == AND && and_operand (x, VOIDmode)))
1290 *total = 0;
1291 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1292 *total = 2;
1293 else
1294 *total = COSTS_N_INSNS (2);
1295 return true;
1296
1297 case CONST:
1298 case SYMBOL_REF:
1299 case LABEL_REF:
1300 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1301 *total = COSTS_N_INSNS (outer_code != MEM);
1302 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1303 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1304 else if (tls_symbolic_operand_type (x))
1305 /* Estimate of cost for call_pal rduniq. */
1306 /* ??? How many insns do we emit here? More than one... */
1307 *total = COSTS_N_INSNS (15);
1308 else
1309 /* Otherwise we do a load from the GOT. */
1310 *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
1311 return true;
1312
1313 case HIGH:
1314 /* This is effectively an add_operand. */
1315 *total = 2;
1316 return true;
1317
1318 case PLUS:
1319 case MINUS:
1320 if (float_mode_p)
1321 *total = cost_data->fp_add;
1322 else if (GET_CODE (XEXP (x, 0)) == MULT
1323 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1324 {
1325 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
1326 + rtx_cost (XEXP (x, 1), outer_code) + COSTS_N_INSNS (1));
1327 return true;
1328 }
1329 return false;
1330
1331 case MULT:
1332 if (float_mode_p)
1333 *total = cost_data->fp_mult;
1334 else if (mode == DImode)
1335 *total = cost_data->int_mult_di;
1336 else
1337 *total = cost_data->int_mult_si;
1338 return false;
1339
1340 case ASHIFT:
1341 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1342 && INTVAL (XEXP (x, 1)) <= 3)
1343 {
1344 *total = COSTS_N_INSNS (1);
1345 return false;
1346 }
1347 /* FALLTHRU */
1348
1349 case ASHIFTRT:
1350 case LSHIFTRT:
1351 *total = cost_data->int_shift;
1352 return false;
1353
1354 case IF_THEN_ELSE:
1355 if (float_mode_p)
1356 *total = cost_data->fp_add;
1357 else
1358 *total = cost_data->int_cmov;
1359 return false;
1360
1361 case DIV:
1362 case UDIV:
1363 case MOD:
1364 case UMOD:
1365 if (!float_mode_p)
1366 *total = cost_data->int_div;
1367 else if (mode == SFmode)
1368 *total = cost_data->fp_div_sf;
1369 else
1370 *total = cost_data->fp_div_df;
1371 return false;
1372
1373 case MEM:
1374 *total = COSTS_N_INSNS (optimize_size ? 1 : alpha_memory_latency);
1375 return true;
1376
1377 case NEG:
1378 if (! float_mode_p)
1379 {
1380 *total = COSTS_N_INSNS (1);
1381 return false;
1382 }
1383 /* FALLTHRU */
1384
1385 case ABS:
1386 if (! float_mode_p)
1387 {
1388 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1389 return false;
1390 }
1391 /* FALLTHRU */
1392
1393 case FLOAT:
1394 case UNSIGNED_FLOAT:
1395 case FIX:
1396 case UNSIGNED_FIX:
1397 case FLOAT_TRUNCATE:
1398 *total = cost_data->fp_add;
1399 return false;
1400
1401 case FLOAT_EXTEND:
1402 if (GET_CODE (XEXP (x, 0)) == MEM)
1403 *total = 0;
1404 else
1405 *total = cost_data->fp_add;
1406 return false;
1407
1408 default:
1409 return false;
1410 }
1411 }
1412 \f
1413 /* REF is an alignable memory location. Place an aligned SImode
1414 reference into *PALIGNED_MEM and the number of bits to shift into
1415 *PBITNUM. SCRATCH is a free register for use in reloading out
1416 of range stack slots. */
1417
1418 void
1419 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1420 {
1421 rtx base;
1422 HOST_WIDE_INT disp, offset;
1423
1424 gcc_assert (GET_CODE (ref) == MEM);
1425
1426 if (reload_in_progress
1427 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1428 {
1429 base = find_replacement (&XEXP (ref, 0));
1430 gcc_assert (memory_address_p (GET_MODE (ref), base));
1431 }
1432 else
1433 base = XEXP (ref, 0);
1434
1435 if (GET_CODE (base) == PLUS)
1436 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1437 else
1438 disp = 0;
1439
1440 /* Find the byte offset within an aligned word. If the memory itself is
1441 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1442 will have examined the base register and determined it is aligned, and
1443 thus displacements from it are naturally alignable. */
1444 if (MEM_ALIGN (ref) >= 32)
1445 offset = 0;
1446 else
1447 offset = disp & 3;
1448
1449 /* Access the entire aligned word. */
1450 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1451
1452 /* Convert the byte offset within the word to a bit offset. */
1453 if (WORDS_BIG_ENDIAN)
1454 offset = 32 - (GET_MODE_BITSIZE (GET_MODE (ref)) + offset * 8);
1455 else
1456 offset *= 8;
1457 *pbitnum = GEN_INT (offset);
1458 }
1459
1460 /* Similar, but just get the address. Handle the two reload cases.
1461 Add EXTRA_OFFSET to the address we return. */
1462
1463 rtx
1464 get_unaligned_address (rtx ref, int extra_offset)
1465 {
1466 rtx base;
1467 HOST_WIDE_INT offset = 0;
1468
1469 gcc_assert (GET_CODE (ref) == MEM);
1470
1471 if (reload_in_progress
1472 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1473 {
1474 base = find_replacement (&XEXP (ref, 0));
1475
1476 gcc_assert (memory_address_p (GET_MODE (ref), base));
1477 }
1478 else
1479 base = XEXP (ref, 0);
1480
1481 if (GET_CODE (base) == PLUS)
1482 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1483
1484 return plus_constant (base, offset + extra_offset);
1485 }
1486
1487 /* On the Alpha, all (non-symbolic) constants except zero go into
1488 a floating-point register via memory. Note that we cannot
1489 return anything that is not a subset of CLASS, and that some
1490 symbolic constants cannot be dropped to memory. */
1491
1492 enum reg_class
1493 alpha_preferred_reload_class(rtx x, enum reg_class class)
1494 {
1495 /* Zero is present in any register class. */
1496 if (x == CONST0_RTX (GET_MODE (x)))
1497 return class;
1498
1499 /* These sorts of constants we can easily drop to memory. */
1500 if (GET_CODE (x) == CONST_INT
1501 || GET_CODE (x) == CONST_DOUBLE
1502 || GET_CODE (x) == CONST_VECTOR)
1503 {
1504 if (class == FLOAT_REGS)
1505 return NO_REGS;
1506 if (class == ALL_REGS)
1507 return GENERAL_REGS;
1508 return class;
1509 }
1510
1511 /* All other kinds of constants should not (and in the case of HIGH
1512 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1513 secondary reload. */
1514 if (CONSTANT_P (x))
1515 return (class == ALL_REGS ? GENERAL_REGS : class);
1516
1517 return class;
1518 }
1519
1520 /* Loading and storing HImode or QImode values to and from memory
1521 usually requires a scratch register. The exceptions are loading
1522 QImode and HImode from an aligned address to a general register
1523 unless byte instructions are permitted.
1524
1525 We also cannot load an unaligned address or a paradoxical SUBREG
1526 into an FP register.
1527
1528 We also cannot do integral arithmetic into FP regs, as might result
1529 from register elimination into a DImode fp register. */
1530
1531 enum reg_class
1532 alpha_secondary_reload_class (enum reg_class class, enum machine_mode mode,
1533 rtx x, int in)
1534 {
1535 if ((mode == QImode || mode == HImode) && ! TARGET_BWX)
1536 {
1537 if (GET_CODE (x) == MEM
1538 || (GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
1539 || (GET_CODE (x) == SUBREG
1540 && (GET_CODE (SUBREG_REG (x)) == MEM
1541 || (GET_CODE (SUBREG_REG (x)) == REG
1542 && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER))))
1543 {
1544 if (!in || !aligned_memory_operand(x, mode))
1545 return GENERAL_REGS;
1546 }
1547 }
1548
1549 if (class == FLOAT_REGS)
1550 {
1551 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
1552 return GENERAL_REGS;
1553
1554 if (GET_CODE (x) == SUBREG
1555 && (GET_MODE_SIZE (GET_MODE (x))
1556 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
1557 return GENERAL_REGS;
1558
1559 if (in && INTEGRAL_MODE_P (mode)
1560 && ! (memory_operand (x, mode) || x == const0_rtx))
1561 return GENERAL_REGS;
1562 }
1563
1564 return NO_REGS;
1565 }
1566 \f
1567 /* Subfunction of the following function. Update the flags of any MEM
1568 found in part of X. */
1569
1570 static int
1571 alpha_set_memflags_1 (rtx *xp, void *data)
1572 {
1573 rtx x = *xp, orig = (rtx) data;
1574
1575 if (GET_CODE (x) != MEM)
1576 return 0;
1577
1578 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1579 MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig);
1580 MEM_SCALAR_P (x) = MEM_SCALAR_P (orig);
1581 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1582 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1583
1584 /* Sadly, we cannot use alias sets because the extra aliasing
1585 produced by the AND interferes. Given that two-byte quantities
1586 are the only thing we would be able to differentiate anyway,
1587 there does not seem to be any point in convoluting the early
1588 out of the alias check. */
1589
1590 return -1;
1591 }
1592
1593 /* Given INSN, which is an INSN list or the PATTERN of a single insn
1594 generated to perform a memory operation, look for any MEMs in either
1595 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1596 volatile flags from REF into each of the MEMs found. If REF is not
1597 a MEM, don't do anything. */
1598
1599 void
1600 alpha_set_memflags (rtx insn, rtx ref)
1601 {
1602 rtx *base_ptr;
1603
1604 if (GET_CODE (ref) != MEM)
1605 return;
1606
1607 /* This is only called from alpha.md, after having had something
1608 generated from one of the insn patterns. So if everything is
1609 zero, the pattern is already up-to-date. */
1610 if (!MEM_VOLATILE_P (ref)
1611 && !MEM_IN_STRUCT_P (ref)
1612 && !MEM_SCALAR_P (ref)
1613 && !MEM_NOTRAP_P (ref)
1614 && !MEM_READONLY_P (ref))
1615 return;
1616
1617 if (INSN_P (insn))
1618 base_ptr = &PATTERN (insn);
1619 else
1620 base_ptr = &insn;
1621 for_each_rtx (base_ptr, alpha_set_memflags_1, (void *) ref);
1622 }
1623 \f
1624 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1625 int, bool);
1626
1627 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1628 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1629 and return pc_rtx if successful. */
1630
1631 static rtx
1632 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1633 HOST_WIDE_INT c, int n, bool no_output)
1634 {
1635 HOST_WIDE_INT new;
1636 int i, bits;
1637 /* Use a pseudo if highly optimizing and still generating RTL. */
1638 rtx subtarget
1639 = (flag_expensive_optimizations && !no_new_pseudos ? 0 : target);
1640 rtx temp, insn;
1641
1642 /* If this is a sign-extended 32-bit constant, we can do this in at most
1643 three insns, so do it if we have enough insns left. We always have
1644 a sign-extended 32-bit constant when compiling on a narrow machine. */
1645
1646 if (HOST_BITS_PER_WIDE_INT != 64
1647 || c >> 31 == -1 || c >> 31 == 0)
1648 {
1649 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1650 HOST_WIDE_INT tmp1 = c - low;
1651 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1652 HOST_WIDE_INT extra = 0;
1653
1654 /* If HIGH will be interpreted as negative but the constant is
1655 positive, we must adjust it to do two ldha insns. */
1656
1657 if ((high & 0x8000) != 0 && c >= 0)
1658 {
1659 extra = 0x4000;
1660 tmp1 -= 0x40000000;
1661 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1662 }
1663
1664 if (c == low || (low == 0 && extra == 0))
1665 {
1666 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1667 but that meant that we can't handle INT_MIN on 32-bit machines
1668 (like NT/Alpha), because we recurse indefinitely through
1669 emit_move_insn to gen_movdi. So instead, since we know exactly
1670 what we want, create it explicitly. */
1671
1672 if (no_output)
1673 return pc_rtx;
1674 if (target == NULL)
1675 target = gen_reg_rtx (mode);
1676 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1677 return target;
1678 }
1679 else if (n >= 2 + (extra != 0))
1680 {
1681 if (no_output)
1682 return pc_rtx;
1683 if (no_new_pseudos)
1684 {
1685 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1686 temp = target;
1687 }
1688 else
1689 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1690 subtarget, mode);
1691
1692 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1693 This means that if we go through expand_binop, we'll try to
1694 generate extensions, etc, which will require new pseudos, which
1695 will fail during some split phases. The SImode add patterns
1696 still exist, but are not named. So build the insns by hand. */
1697
1698 if (extra != 0)
1699 {
1700 if (! subtarget)
1701 subtarget = gen_reg_rtx (mode);
1702 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1703 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1704 emit_insn (insn);
1705 temp = subtarget;
1706 }
1707
1708 if (target == NULL)
1709 target = gen_reg_rtx (mode);
1710 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1711 insn = gen_rtx_SET (VOIDmode, target, insn);
1712 emit_insn (insn);
1713 return target;
1714 }
1715 }
1716
1717 /* If we couldn't do it that way, try some other methods. But if we have
1718 no instructions left, don't bother. Likewise, if this is SImode and
1719 we can't make pseudos, we can't do anything since the expand_binop
1720 and expand_unop calls will widen and try to make pseudos. */
1721
1722 if (n == 1 || (mode == SImode && no_new_pseudos))
1723 return 0;
1724
1725 /* Next, see if we can load a related constant and then shift and possibly
1726 negate it to get the constant we want. Try this once each increasing
1727 numbers of insns. */
1728
1729 for (i = 1; i < n; i++)
1730 {
1731 /* First, see if minus some low bits, we've an easy load of
1732 high bits. */
1733
1734 new = ((c & 0xffff) ^ 0x8000) - 0x8000;
1735 if (new != 0)
1736 {
1737 temp = alpha_emit_set_const (subtarget, mode, c - new, i, no_output);
1738 if (temp)
1739 {
1740 if (no_output)
1741 return temp;
1742 return expand_binop (mode, add_optab, temp, GEN_INT (new),
1743 target, 0, OPTAB_WIDEN);
1744 }
1745 }
1746
1747 /* Next try complementing. */
1748 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1749 if (temp)
1750 {
1751 if (no_output)
1752 return temp;
1753 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1754 }
1755
1756 /* Next try to form a constant and do a left shift. We can do this
1757 if some low-order bits are zero; the exact_log2 call below tells
1758 us that information. The bits we are shifting out could be any
1759 value, but here we'll just try the 0- and sign-extended forms of
1760 the constant. To try to increase the chance of having the same
1761 constant in more than one insn, start at the highest number of
1762 bits to shift, but try all possibilities in case a ZAPNOT will
1763 be useful. */
1764
1765 bits = exact_log2 (c & -c);
1766 if (bits > 0)
1767 for (; bits > 0; bits--)
1768 {
1769 new = c >> bits;
1770 temp = alpha_emit_set_const (subtarget, mode, new, i, no_output);
1771 if (!temp && c < 0)
1772 {
1773 new = (unsigned HOST_WIDE_INT)c >> bits;
1774 temp = alpha_emit_set_const (subtarget, mode, new,
1775 i, no_output);
1776 }
1777 if (temp)
1778 {
1779 if (no_output)
1780 return temp;
1781 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1782 target, 0, OPTAB_WIDEN);
1783 }
1784 }
1785
1786 /* Now try high-order zero bits. Here we try the shifted-in bits as
1787 all zero and all ones. Be careful to avoid shifting outside the
1788 mode and to avoid shifting outside the host wide int size. */
1789 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1790 confuse the recursive call and set all of the high 32 bits. */
1791
1792 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1793 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1794 if (bits > 0)
1795 for (; bits > 0; bits--)
1796 {
1797 new = c << bits;
1798 temp = alpha_emit_set_const (subtarget, mode, new, i, no_output);
1799 if (!temp)
1800 {
1801 new = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1802 temp = alpha_emit_set_const (subtarget, mode, new,
1803 i, no_output);
1804 }
1805 if (temp)
1806 {
1807 if (no_output)
1808 return temp;
1809 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1810 target, 1, OPTAB_WIDEN);
1811 }
1812 }
1813
1814 /* Now try high-order 1 bits. We get that with a sign-extension.
1815 But one bit isn't enough here. Be careful to avoid shifting outside
1816 the mode and to avoid shifting outside the host wide int size. */
1817
1818 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1819 - floor_log2 (~ c) - 2);
1820 if (bits > 0)
1821 for (; bits > 0; bits--)
1822 {
1823 new = c << bits;
1824 temp = alpha_emit_set_const (subtarget, mode, new, i, no_output);
1825 if (!temp)
1826 {
1827 new = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1828 temp = alpha_emit_set_const (subtarget, mode, new,
1829 i, no_output);
1830 }
1831 if (temp)
1832 {
1833 if (no_output)
1834 return temp;
1835 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1836 target, 0, OPTAB_WIDEN);
1837 }
1838 }
1839 }
1840
1841 #if HOST_BITS_PER_WIDE_INT == 64
1842 /* Finally, see if can load a value into the target that is the same as the
1843 constant except that all bytes that are 0 are changed to be 0xff. If we
1844 can, then we can do a ZAPNOT to obtain the desired constant. */
1845
1846 new = c;
1847 for (i = 0; i < 64; i += 8)
1848 if ((new & ((HOST_WIDE_INT) 0xff << i)) == 0)
1849 new |= (HOST_WIDE_INT) 0xff << i;
1850
1851 /* We are only called for SImode and DImode. If this is SImode, ensure that
1852 we are sign extended to a full word. */
1853
1854 if (mode == SImode)
1855 new = ((new & 0xffffffff) ^ 0x80000000) - 0x80000000;
1856
1857 if (new != c)
1858 {
1859 temp = alpha_emit_set_const (subtarget, mode, new, n - 1, no_output);
1860 if (temp)
1861 {
1862 if (no_output)
1863 return temp;
1864 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new),
1865 target, 0, OPTAB_WIDEN);
1866 }
1867 }
1868 #endif
1869
1870 return 0;
1871 }
1872
1873 /* Try to output insns to set TARGET equal to the constant C if it can be
1874 done in less than N insns. Do all computations in MODE. Returns the place
1875 where the output has been placed if it can be done and the insns have been
1876 emitted. If it would take more than N insns, zero is returned and no
1877 insns and emitted. */
1878
1879 static rtx
1880 alpha_emit_set_const (rtx target, enum machine_mode mode,
1881 HOST_WIDE_INT c, int n, bool no_output)
1882 {
1883 enum machine_mode orig_mode = mode;
1884 rtx orig_target = target;
1885 rtx result = 0;
1886 int i;
1887
1888 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1889 can't load this constant in one insn, do this in DImode. */
1890 if (no_new_pseudos && mode == SImode
1891 && GET_CODE (target) == REG && REGNO (target) < FIRST_PSEUDO_REGISTER)
1892 {
1893 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1894 if (result)
1895 return result;
1896
1897 target = no_output ? NULL : gen_lowpart (DImode, target);
1898 mode = DImode;
1899 }
1900 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1901 {
1902 target = no_output ? NULL : gen_lowpart (DImode, target);
1903 mode = DImode;
1904 }
1905
1906 /* Try 1 insn, then 2, then up to N. */
1907 for (i = 1; i <= n; i++)
1908 {
1909 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1910 if (result)
1911 {
1912 rtx insn, set;
1913
1914 if (no_output)
1915 return result;
1916
1917 insn = get_last_insn ();
1918 set = single_set (insn);
1919 if (! CONSTANT_P (SET_SRC (set)))
1920 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1921 break;
1922 }
1923 }
1924
1925 /* Allow for the case where we changed the mode of TARGET. */
1926 if (result)
1927 {
1928 if (result == target)
1929 result = orig_target;
1930 else if (mode != orig_mode)
1931 result = gen_lowpart (orig_mode, result);
1932 }
1933
1934 return result;
1935 }
1936
1937 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1938 fall back to a straight forward decomposition. We do this to avoid
1939 exponential run times encountered when looking for longer sequences
1940 with alpha_emit_set_const. */
1941
1942 static rtx
1943 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1944 {
1945 HOST_WIDE_INT d1, d2, d3, d4;
1946
1947 /* Decompose the entire word */
1948 #if HOST_BITS_PER_WIDE_INT >= 64
1949 gcc_assert (c2 == -(c1 < 0));
1950 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1951 c1 -= d1;
1952 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1953 c1 = (c1 - d2) >> 32;
1954 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1955 c1 -= d3;
1956 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1957 gcc_assert (c1 == d4);
1958 #else
1959 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1960 c1 -= d1;
1961 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1962 gcc_assert (c1 == d2);
1963 c2 += (d2 < 0);
1964 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1965 c2 -= d3;
1966 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1967 gcc_assert (c2 == d4);
1968 #endif
1969
1970 /* Construct the high word */
1971 if (d4)
1972 {
1973 emit_move_insn (target, GEN_INT (d4));
1974 if (d3)
1975 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1976 }
1977 else
1978 emit_move_insn (target, GEN_INT (d3));
1979
1980 /* Shift it into place */
1981 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1982
1983 /* Add in the low bits. */
1984 if (d2)
1985 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1986 if (d1)
1987 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1988
1989 return target;
1990 }
1991
1992 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
1993 the low 64 bits. */
1994
1995 static void
1996 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
1997 {
1998 HOST_WIDE_INT i0, i1;
1999
2000 if (GET_CODE (x) == CONST_VECTOR)
2001 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2002
2003
2004 if (GET_CODE (x) == CONST_INT)
2005 {
2006 i0 = INTVAL (x);
2007 i1 = -(i0 < 0);
2008 }
2009 else if (HOST_BITS_PER_WIDE_INT >= 64)
2010 {
2011 i0 = CONST_DOUBLE_LOW (x);
2012 i1 = -(i0 < 0);
2013 }
2014 else
2015 {
2016 i0 = CONST_DOUBLE_LOW (x);
2017 i1 = CONST_DOUBLE_HIGH (x);
2018 }
2019
2020 *p0 = i0;
2021 *p1 = i1;
2022 }
2023
2024 /* Implement LEGITIMATE_CONSTANT_P. This is all constants for which we
2025 are willing to load the value into a register via a move pattern.
2026 Normally this is all symbolic constants, integral constants that
2027 take three or fewer instructions, and floating-point zero. */
2028
2029 bool
2030 alpha_legitimate_constant_p (rtx x)
2031 {
2032 enum machine_mode mode = GET_MODE (x);
2033 HOST_WIDE_INT i0, i1;
2034
2035 switch (GET_CODE (x))
2036 {
2037 case CONST:
2038 case LABEL_REF:
2039 case HIGH:
2040 return true;
2041
2042 case SYMBOL_REF:
2043 /* TLS symbols are never valid. */
2044 return SYMBOL_REF_TLS_MODEL (x) == 0;
2045
2046 case CONST_DOUBLE:
2047 if (x == CONST0_RTX (mode))
2048 return true;
2049 if (FLOAT_MODE_P (mode))
2050 return false;
2051 goto do_integer;
2052
2053 case CONST_VECTOR:
2054 if (x == CONST0_RTX (mode))
2055 return true;
2056 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2057 return false;
2058 if (GET_MODE_SIZE (mode) != 8)
2059 return false;
2060 goto do_integer;
2061
2062 case CONST_INT:
2063 do_integer:
2064 if (TARGET_BUILD_CONSTANTS)
2065 return true;
2066 alpha_extract_integer (x, &i0, &i1);
2067 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2068 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2069 return false;
2070
2071 default:
2072 return false;
2073 }
2074 }
2075
2076 /* Operand 1 is known to be a constant, and should require more than one
2077 instruction to load. Emit that multi-part load. */
2078
2079 bool
2080 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2081 {
2082 HOST_WIDE_INT i0, i1;
2083 rtx temp = NULL_RTX;
2084
2085 alpha_extract_integer (operands[1], &i0, &i1);
2086
2087 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2088 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2089
2090 if (!temp && TARGET_BUILD_CONSTANTS)
2091 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2092
2093 if (temp)
2094 {
2095 if (!rtx_equal_p (operands[0], temp))
2096 emit_move_insn (operands[0], temp);
2097 return true;
2098 }
2099
2100 return false;
2101 }
2102
2103 /* Expand a move instruction; return true if all work is done.
2104 We don't handle non-bwx subword loads here. */
2105
2106 bool
2107 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2108 {
2109 /* If the output is not a register, the input must be. */
2110 if (GET_CODE (operands[0]) == MEM
2111 && ! reg_or_0_operand (operands[1], mode))
2112 operands[1] = force_reg (mode, operands[1]);
2113
2114 /* Allow legitimize_address to perform some simplifications. */
2115 if (mode == Pmode && symbolic_operand (operands[1], mode))
2116 {
2117 rtx tmp;
2118
2119 tmp = alpha_legitimize_address (operands[1], operands[0], mode);
2120 if (tmp)
2121 {
2122 if (tmp == operands[0])
2123 return true;
2124 operands[1] = tmp;
2125 return false;
2126 }
2127 }
2128
2129 /* Early out for non-constants and valid constants. */
2130 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2131 return false;
2132
2133 /* Split large integers. */
2134 if (GET_CODE (operands[1]) == CONST_INT
2135 || GET_CODE (operands[1]) == CONST_DOUBLE
2136 || GET_CODE (operands[1]) == CONST_VECTOR)
2137 {
2138 if (alpha_split_const_mov (mode, operands))
2139 return true;
2140 }
2141
2142 /* Otherwise we've nothing left but to drop the thing to memory. */
2143 operands[1] = force_const_mem (mode, operands[1]);
2144 if (reload_in_progress)
2145 {
2146 emit_move_insn (operands[0], XEXP (operands[1], 0));
2147 operands[1] = copy_rtx (operands[1]);
2148 XEXP (operands[1], 0) = operands[0];
2149 }
2150 else
2151 operands[1] = validize_mem (operands[1]);
2152 return false;
2153 }
2154
2155 /* Expand a non-bwx QImode or HImode move instruction;
2156 return true if all work is done. */
2157
2158 bool
2159 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2160 {
2161 /* If the output is not a register, the input must be. */
2162 if (GET_CODE (operands[0]) == MEM)
2163 operands[1] = force_reg (mode, operands[1]);
2164
2165 /* Handle four memory cases, unaligned and aligned for either the input
2166 or the output. The only case where we can be called during reload is
2167 for aligned loads; all other cases require temporaries. */
2168
2169 if (GET_CODE (operands[1]) == MEM
2170 || (GET_CODE (operands[1]) == SUBREG
2171 && GET_CODE (SUBREG_REG (operands[1])) == MEM)
2172 || (reload_in_progress && GET_CODE (operands[1]) == REG
2173 && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER)
2174 || (reload_in_progress && GET_CODE (operands[1]) == SUBREG
2175 && GET_CODE (SUBREG_REG (operands[1])) == REG
2176 && REGNO (SUBREG_REG (operands[1])) >= FIRST_PSEUDO_REGISTER))
2177 {
2178 if (aligned_memory_operand (operands[1], mode))
2179 {
2180 if (reload_in_progress)
2181 {
2182 emit_insn ((mode == QImode
2183 ? gen_reload_inqi_help
2184 : gen_reload_inhi_help)
2185 (operands[0], operands[1],
2186 gen_rtx_REG (SImode, REGNO (operands[0]))));
2187 }
2188 else
2189 {
2190 rtx aligned_mem, bitnum;
2191 rtx scratch = gen_reg_rtx (SImode);
2192 rtx subtarget;
2193 bool copyout;
2194
2195 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2196
2197 subtarget = operands[0];
2198 if (GET_CODE (subtarget) == REG)
2199 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2200 else
2201 subtarget = gen_reg_rtx (DImode), copyout = true;
2202
2203 emit_insn ((mode == QImode
2204 ? gen_aligned_loadqi
2205 : gen_aligned_loadhi)
2206 (subtarget, aligned_mem, bitnum, scratch));
2207
2208 if (copyout)
2209 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2210 }
2211 }
2212 else
2213 {
2214 /* Don't pass these as parameters since that makes the generated
2215 code depend on parameter evaluation order which will cause
2216 bootstrap failures. */
2217
2218 rtx temp1, temp2, seq, subtarget;
2219 bool copyout;
2220
2221 temp1 = gen_reg_rtx (DImode);
2222 temp2 = gen_reg_rtx (DImode);
2223
2224 subtarget = operands[0];
2225 if (GET_CODE (subtarget) == REG)
2226 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2227 else
2228 subtarget = gen_reg_rtx (DImode), copyout = true;
2229
2230 seq = ((mode == QImode
2231 ? gen_unaligned_loadqi
2232 : gen_unaligned_loadhi)
2233 (subtarget, get_unaligned_address (operands[1], 0),
2234 temp1, temp2));
2235 alpha_set_memflags (seq, operands[1]);
2236 emit_insn (seq);
2237
2238 if (copyout)
2239 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2240 }
2241 return true;
2242 }
2243
2244 if (GET_CODE (operands[0]) == MEM
2245 || (GET_CODE (operands[0]) == SUBREG
2246 && GET_CODE (SUBREG_REG (operands[0])) == MEM)
2247 || (reload_in_progress && GET_CODE (operands[0]) == REG
2248 && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
2249 || (reload_in_progress && GET_CODE (operands[0]) == SUBREG
2250 && GET_CODE (SUBREG_REG (operands[0])) == REG
2251 && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
2252 {
2253 if (aligned_memory_operand (operands[0], mode))
2254 {
2255 rtx aligned_mem, bitnum;
2256 rtx temp1 = gen_reg_rtx (SImode);
2257 rtx temp2 = gen_reg_rtx (SImode);
2258
2259 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2260
2261 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2262 temp1, temp2));
2263 }
2264 else
2265 {
2266 rtx temp1 = gen_reg_rtx (DImode);
2267 rtx temp2 = gen_reg_rtx (DImode);
2268 rtx temp3 = gen_reg_rtx (DImode);
2269 rtx seq = ((mode == QImode
2270 ? gen_unaligned_storeqi
2271 : gen_unaligned_storehi)
2272 (get_unaligned_address (operands[0], 0),
2273 operands[1], temp1, temp2, temp3));
2274
2275 alpha_set_memflags (seq, operands[0]);
2276 emit_insn (seq);
2277 }
2278 return true;
2279 }
2280
2281 return false;
2282 }
2283
2284 /* Implement the movmisalign patterns. One of the operands is a memory
2285 that is not naturally aligned. Emit instructions to load it. */
2286
2287 void
2288 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2289 {
2290 /* Honor misaligned loads, for those we promised to do so. */
2291 if (MEM_P (operands[1]))
2292 {
2293 rtx tmp;
2294
2295 if (register_operand (operands[0], mode))
2296 tmp = operands[0];
2297 else
2298 tmp = gen_reg_rtx (mode);
2299
2300 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2301 if (tmp != operands[0])
2302 emit_move_insn (operands[0], tmp);
2303 }
2304 else if (MEM_P (operands[0]))
2305 {
2306 if (!reg_or_0_operand (operands[1], mode))
2307 operands[1] = force_reg (mode, operands[1]);
2308 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2309 }
2310 else
2311 gcc_unreachable ();
2312 }
2313
2314 /* Generate an unsigned DImode to FP conversion. This is the same code
2315 optabs would emit if we didn't have TFmode patterns.
2316
2317 For SFmode, this is the only construction I've found that can pass
2318 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2319 intermediates will work, because you'll get intermediate rounding
2320 that ruins the end result. Some of this could be fixed by turning
2321 on round-to-positive-infinity, but that requires diddling the fpsr,
2322 which kills performance. I tried turning this around and converting
2323 to a negative number, so that I could turn on /m, but either I did
2324 it wrong or there's something else cause I wound up with the exact
2325 same single-bit error. There is a branch-less form of this same code:
2326
2327 srl $16,1,$1
2328 and $16,1,$2
2329 cmplt $16,0,$3
2330 or $1,$2,$2
2331 cmovge $16,$16,$2
2332 itoft $3,$f10
2333 itoft $2,$f11
2334 cvtqs $f11,$f11
2335 adds $f11,$f11,$f0
2336 fcmoveq $f10,$f11,$f0
2337
2338 I'm not using it because it's the same number of instructions as
2339 this branch-full form, and it has more serialized long latency
2340 instructions on the critical path.
2341
2342 For DFmode, we can avoid rounding errors by breaking up the word
2343 into two pieces, converting them separately, and adding them back:
2344
2345 LC0: .long 0,0x5f800000
2346
2347 itoft $16,$f11
2348 lda $2,LC0
2349 cmplt $16,0,$1
2350 cpyse $f11,$f31,$f10
2351 cpyse $f31,$f11,$f11
2352 s4addq $1,$2,$1
2353 lds $f12,0($1)
2354 cvtqt $f10,$f10
2355 cvtqt $f11,$f11
2356 addt $f12,$f10,$f0
2357 addt $f0,$f11,$f0
2358
2359 This doesn't seem to be a clear-cut win over the optabs form.
2360 It probably all depends on the distribution of numbers being
2361 converted -- in the optabs form, all but high-bit-set has a
2362 much lower minimum execution time. */
2363
2364 void
2365 alpha_emit_floatuns (rtx operands[2])
2366 {
2367 rtx neglab, donelab, i0, i1, f0, in, out;
2368 enum machine_mode mode;
2369
2370 out = operands[0];
2371 in = force_reg (DImode, operands[1]);
2372 mode = GET_MODE (out);
2373 neglab = gen_label_rtx ();
2374 donelab = gen_label_rtx ();
2375 i0 = gen_reg_rtx (DImode);
2376 i1 = gen_reg_rtx (DImode);
2377 f0 = gen_reg_rtx (mode);
2378
2379 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2380
2381 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2382 emit_jump_insn (gen_jump (donelab));
2383 emit_barrier ();
2384
2385 emit_label (neglab);
2386
2387 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2388 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2389 emit_insn (gen_iordi3 (i0, i0, i1));
2390 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2391 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2392
2393 emit_label (donelab);
2394 }
2395
2396 /* Generate the comparison for a conditional branch. */
2397
2398 rtx
2399 alpha_emit_conditional_branch (enum rtx_code code)
2400 {
2401 enum rtx_code cmp_code, branch_code;
2402 enum machine_mode cmp_mode, branch_mode = VOIDmode;
2403 rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
2404 rtx tem;
2405
2406 if (alpha_compare.fp_p && GET_MODE (op0) == TFmode)
2407 {
2408 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2409 op1 = const0_rtx;
2410 alpha_compare.fp_p = 0;
2411 }
2412
2413 /* The general case: fold the comparison code to the types of compares
2414 that we have, choosing the branch as necessary. */
2415 switch (code)
2416 {
2417 case EQ: case LE: case LT: case LEU: case LTU:
2418 case UNORDERED:
2419 /* We have these compares: */
2420 cmp_code = code, branch_code = NE;
2421 break;
2422
2423 case NE:
2424 case ORDERED:
2425 /* These must be reversed. */
2426 cmp_code = reverse_condition (code), branch_code = EQ;
2427 break;
2428
2429 case GE: case GT: case GEU: case GTU:
2430 /* For FP, we swap them, for INT, we reverse them. */
2431 if (alpha_compare.fp_p)
2432 {
2433 cmp_code = swap_condition (code);
2434 branch_code = NE;
2435 tem = op0, op0 = op1, op1 = tem;
2436 }
2437 else
2438 {
2439 cmp_code = reverse_condition (code);
2440 branch_code = EQ;
2441 }
2442 break;
2443
2444 default:
2445 gcc_unreachable ();
2446 }
2447
2448 if (alpha_compare.fp_p)
2449 {
2450 cmp_mode = DFmode;
2451 if (flag_unsafe_math_optimizations)
2452 {
2453 /* When we are not as concerned about non-finite values, and we
2454 are comparing against zero, we can branch directly. */
2455 if (op1 == CONST0_RTX (DFmode))
2456 cmp_code = UNKNOWN, branch_code = code;
2457 else if (op0 == CONST0_RTX (DFmode))
2458 {
2459 /* Undo the swap we probably did just above. */
2460 tem = op0, op0 = op1, op1 = tem;
2461 branch_code = swap_condition (cmp_code);
2462 cmp_code = UNKNOWN;
2463 }
2464 }
2465 else
2466 {
2467 /* ??? We mark the branch mode to be CCmode to prevent the
2468 compare and branch from being combined, since the compare
2469 insn follows IEEE rules that the branch does not. */
2470 branch_mode = CCmode;
2471 }
2472 }
2473 else
2474 {
2475 cmp_mode = DImode;
2476
2477 /* The following optimizations are only for signed compares. */
2478 if (code != LEU && code != LTU && code != GEU && code != GTU)
2479 {
2480 /* Whee. Compare and branch against 0 directly. */
2481 if (op1 == const0_rtx)
2482 cmp_code = UNKNOWN, branch_code = code;
2483
2484 /* If the constants doesn't fit into an immediate, but can
2485 be generated by lda/ldah, we adjust the argument and
2486 compare against zero, so we can use beq/bne directly. */
2487 /* ??? Don't do this when comparing against symbols, otherwise
2488 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2489 be declared false out of hand (at least for non-weak). */
2490 else if (GET_CODE (op1) == CONST_INT
2491 && (code == EQ || code == NE)
2492 && !(symbolic_operand (op0, VOIDmode)
2493 || (GET_CODE (op0) == REG && REG_POINTER (op0))))
2494 {
2495 rtx n_op1 = GEN_INT (-INTVAL (op1));
2496
2497 if (! satisfies_constraint_I (op1)
2498 && (satisfies_constraint_K (n_op1)
2499 || satisfies_constraint_L (n_op1)))
2500 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2501 }
2502 }
2503
2504 if (!reg_or_0_operand (op0, DImode))
2505 op0 = force_reg (DImode, op0);
2506 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2507 op1 = force_reg (DImode, op1);
2508 }
2509
2510 /* Emit an initial compare instruction, if necessary. */
2511 tem = op0;
2512 if (cmp_code != UNKNOWN)
2513 {
2514 tem = gen_reg_rtx (cmp_mode);
2515 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2516 }
2517
2518 /* Zero the operands. */
2519 memset (&alpha_compare, 0, sizeof (alpha_compare));
2520
2521 /* Return the branch comparison. */
2522 return gen_rtx_fmt_ee (branch_code, branch_mode, tem, CONST0_RTX (cmp_mode));
2523 }
2524
2525 /* Certain simplifications can be done to make invalid setcc operations
2526 valid. Return the final comparison, or NULL if we can't work. */
2527
2528 rtx
2529 alpha_emit_setcc (enum rtx_code code)
2530 {
2531 enum rtx_code cmp_code;
2532 rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
2533 int fp_p = alpha_compare.fp_p;
2534 rtx tmp;
2535
2536 /* Zero the operands. */
2537 memset (&alpha_compare, 0, sizeof (alpha_compare));
2538
2539 if (fp_p && GET_MODE (op0) == TFmode)
2540 {
2541 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2542 op1 = const0_rtx;
2543 fp_p = 0;
2544 }
2545
2546 if (fp_p && !TARGET_FIX)
2547 return NULL_RTX;
2548
2549 /* The general case: fold the comparison code to the types of compares
2550 that we have, choosing the branch as necessary. */
2551
2552 cmp_code = UNKNOWN;
2553 switch (code)
2554 {
2555 case EQ: case LE: case LT: case LEU: case LTU:
2556 case UNORDERED:
2557 /* We have these compares. */
2558 if (fp_p)
2559 cmp_code = code, code = NE;
2560 break;
2561
2562 case NE:
2563 if (!fp_p && op1 == const0_rtx)
2564 break;
2565 /* FALLTHRU */
2566
2567 case ORDERED:
2568 cmp_code = reverse_condition (code);
2569 code = EQ;
2570 break;
2571
2572 case GE: case GT: case GEU: case GTU:
2573 /* These normally need swapping, but for integer zero we have
2574 special patterns that recognize swapped operands. */
2575 if (!fp_p && op1 == const0_rtx)
2576 break;
2577 code = swap_condition (code);
2578 if (fp_p)
2579 cmp_code = code, code = NE;
2580 tmp = op0, op0 = op1, op1 = tmp;
2581 break;
2582
2583 default:
2584 gcc_unreachable ();
2585 }
2586
2587 if (!fp_p)
2588 {
2589 if (!register_operand (op0, DImode))
2590 op0 = force_reg (DImode, op0);
2591 if (!reg_or_8bit_operand (op1, DImode))
2592 op1 = force_reg (DImode, op1);
2593 }
2594
2595 /* Emit an initial compare instruction, if necessary. */
2596 if (cmp_code != UNKNOWN)
2597 {
2598 enum machine_mode mode = fp_p ? DFmode : DImode;
2599
2600 tmp = gen_reg_rtx (mode);
2601 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2602 gen_rtx_fmt_ee (cmp_code, mode, op0, op1)));
2603
2604 op0 = fp_p ? gen_lowpart (DImode, tmp) : tmp;
2605 op1 = const0_rtx;
2606 }
2607
2608 /* Return the setcc comparison. */
2609 return gen_rtx_fmt_ee (code, DImode, op0, op1);
2610 }
2611
2612
2613 /* Rewrite a comparison against zero CMP of the form
2614 (CODE (cc0) (const_int 0)) so it can be written validly in
2615 a conditional move (if_then_else CMP ...).
2616 If both of the operands that set cc0 are nonzero we must emit
2617 an insn to perform the compare (it can't be done within
2618 the conditional move). */
2619
2620 rtx
2621 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2622 {
2623 enum rtx_code code = GET_CODE (cmp);
2624 enum rtx_code cmov_code = NE;
2625 rtx op0 = alpha_compare.op0;
2626 rtx op1 = alpha_compare.op1;
2627 int fp_p = alpha_compare.fp_p;
2628 enum machine_mode cmp_mode
2629 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2630 enum machine_mode cmp_op_mode = fp_p ? DFmode : DImode;
2631 enum machine_mode cmov_mode = VOIDmode;
2632 int local_fast_math = flag_unsafe_math_optimizations;
2633 rtx tem;
2634
2635 /* Zero the operands. */
2636 memset (&alpha_compare, 0, sizeof (alpha_compare));
2637
2638 if (fp_p != FLOAT_MODE_P (mode))
2639 {
2640 enum rtx_code cmp_code;
2641
2642 if (! TARGET_FIX)
2643 return 0;
2644
2645 /* If we have fp<->int register move instructions, do a cmov by
2646 performing the comparison in fp registers, and move the
2647 zero/nonzero value to integer registers, where we can then
2648 use a normal cmov, or vice-versa. */
2649
2650 switch (code)
2651 {
2652 case EQ: case LE: case LT: case LEU: case LTU:
2653 /* We have these compares. */
2654 cmp_code = code, code = NE;
2655 break;
2656
2657 case NE:
2658 /* This must be reversed. */
2659 cmp_code = EQ, code = EQ;
2660 break;
2661
2662 case GE: case GT: case GEU: case GTU:
2663 /* These normally need swapping, but for integer zero we have
2664 special patterns that recognize swapped operands. */
2665 if (!fp_p && op1 == const0_rtx)
2666 cmp_code = code, code = NE;
2667 else
2668 {
2669 cmp_code = swap_condition (code);
2670 code = NE;
2671 tem = op0, op0 = op1, op1 = tem;
2672 }
2673 break;
2674
2675 default:
2676 gcc_unreachable ();
2677 }
2678
2679 tem = gen_reg_rtx (cmp_op_mode);
2680 emit_insn (gen_rtx_SET (VOIDmode, tem,
2681 gen_rtx_fmt_ee (cmp_code, cmp_op_mode,
2682 op0, op1)));
2683
2684 cmp_mode = cmp_op_mode = fp_p ? DImode : DFmode;
2685 op0 = gen_lowpart (cmp_op_mode, tem);
2686 op1 = CONST0_RTX (cmp_op_mode);
2687 fp_p = !fp_p;
2688 local_fast_math = 1;
2689 }
2690
2691 /* We may be able to use a conditional move directly.
2692 This avoids emitting spurious compares. */
2693 if (signed_comparison_operator (cmp, VOIDmode)
2694 && (!fp_p || local_fast_math)
2695 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2696 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2697
2698 /* We can't put the comparison inside the conditional move;
2699 emit a compare instruction and put that inside the
2700 conditional move. Make sure we emit only comparisons we have;
2701 swap or reverse as necessary. */
2702
2703 if (no_new_pseudos)
2704 return NULL_RTX;
2705
2706 switch (code)
2707 {
2708 case EQ: case LE: case LT: case LEU: case LTU:
2709 /* We have these compares: */
2710 break;
2711
2712 case NE:
2713 /* This must be reversed. */
2714 code = reverse_condition (code);
2715 cmov_code = EQ;
2716 break;
2717
2718 case GE: case GT: case GEU: case GTU:
2719 /* These must be swapped. */
2720 if (op1 != CONST0_RTX (cmp_mode))
2721 {
2722 code = swap_condition (code);
2723 tem = op0, op0 = op1, op1 = tem;
2724 }
2725 break;
2726
2727 default:
2728 gcc_unreachable ();
2729 }
2730
2731 if (!fp_p)
2732 {
2733 if (!reg_or_0_operand (op0, DImode))
2734 op0 = force_reg (DImode, op0);
2735 if (!reg_or_8bit_operand (op1, DImode))
2736 op1 = force_reg (DImode, op1);
2737 }
2738
2739 /* ??? We mark the branch mode to be CCmode to prevent the compare
2740 and cmov from being combined, since the compare insn follows IEEE
2741 rules that the cmov does not. */
2742 if (fp_p && !local_fast_math)
2743 cmov_mode = CCmode;
2744
2745 tem = gen_reg_rtx (cmp_op_mode);
2746 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
2747 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
2748 }
2749
2750 /* Simplify a conditional move of two constants into a setcc with
2751 arithmetic. This is done with a splitter since combine would
2752 just undo the work if done during code generation. It also catches
2753 cases we wouldn't have before cse. */
2754
2755 int
2756 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2757 rtx t_rtx, rtx f_rtx)
2758 {
2759 HOST_WIDE_INT t, f, diff;
2760 enum machine_mode mode;
2761 rtx target, subtarget, tmp;
2762
2763 mode = GET_MODE (dest);
2764 t = INTVAL (t_rtx);
2765 f = INTVAL (f_rtx);
2766 diff = t - f;
2767
2768 if (((code == NE || code == EQ) && diff < 0)
2769 || (code == GE || code == GT))
2770 {
2771 code = reverse_condition (code);
2772 diff = t, t = f, f = diff;
2773 diff = t - f;
2774 }
2775
2776 subtarget = target = dest;
2777 if (mode != DImode)
2778 {
2779 target = gen_lowpart (DImode, dest);
2780 if (! no_new_pseudos)
2781 subtarget = gen_reg_rtx (DImode);
2782 else
2783 subtarget = target;
2784 }
2785 /* Below, we must be careful to use copy_rtx on target and subtarget
2786 in intermediate insns, as they may be a subreg rtx, which may not
2787 be shared. */
2788
2789 if (f == 0 && exact_log2 (diff) > 0
2790 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2791 viable over a longer latency cmove. On EV5, the E0 slot is a
2792 scarce resource, and on EV4 shift has the same latency as a cmove. */
2793 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2794 {
2795 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2796 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2797
2798 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2799 GEN_INT (exact_log2 (t)));
2800 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2801 }
2802 else if (f == 0 && t == -1)
2803 {
2804 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2805 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2806
2807 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2808 }
2809 else if (diff == 1 || diff == 4 || diff == 8)
2810 {
2811 rtx add_op;
2812
2813 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2814 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2815
2816 if (diff == 1)
2817 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2818 else
2819 {
2820 add_op = GEN_INT (f);
2821 if (sext_add_operand (add_op, mode))
2822 {
2823 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2824 GEN_INT (diff));
2825 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2826 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2827 }
2828 else
2829 return 0;
2830 }
2831 }
2832 else
2833 return 0;
2834
2835 return 1;
2836 }
2837 \f
2838 /* Look up the function X_floating library function name for the
2839 given operation. */
2840
2841 struct xfloating_op GTY(())
2842 {
2843 const enum rtx_code code;
2844 const char *const GTY((skip)) osf_func;
2845 const char *const GTY((skip)) vms_func;
2846 rtx libcall;
2847 };
2848
2849 static GTY(()) struct xfloating_op xfloating_ops[] =
2850 {
2851 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2852 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2853 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2854 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2855 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2856 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2857 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2858 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2859 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2860 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2861 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2862 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2863 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2864 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2865 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2866 };
2867
2868 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2869 {
2870 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2871 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2872 };
2873
2874 static rtx
2875 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2876 {
2877 struct xfloating_op *ops = xfloating_ops;
2878 long n = ARRAY_SIZE (xfloating_ops);
2879 long i;
2880
2881 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2882
2883 /* How irritating. Nothing to key off for the main table. */
2884 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2885 {
2886 ops = vax_cvt_ops;
2887 n = ARRAY_SIZE (vax_cvt_ops);
2888 }
2889
2890 for (i = 0; i < n; ++i, ++ops)
2891 if (ops->code == code)
2892 {
2893 rtx func = ops->libcall;
2894 if (!func)
2895 {
2896 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2897 ? ops->vms_func : ops->osf_func);
2898 ops->libcall = func;
2899 }
2900 return func;
2901 }
2902
2903 gcc_unreachable ();
2904 }
2905
2906 /* Most X_floating operations take the rounding mode as an argument.
2907 Compute that here. */
2908
2909 static int
2910 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2911 enum alpha_fp_rounding_mode round)
2912 {
2913 int mode;
2914
2915 switch (round)
2916 {
2917 case ALPHA_FPRM_NORM:
2918 mode = 2;
2919 break;
2920 case ALPHA_FPRM_MINF:
2921 mode = 1;
2922 break;
2923 case ALPHA_FPRM_CHOP:
2924 mode = 0;
2925 break;
2926 case ALPHA_FPRM_DYN:
2927 mode = 4;
2928 break;
2929 default:
2930 gcc_unreachable ();
2931
2932 /* XXX For reference, round to +inf is mode = 3. */
2933 }
2934
2935 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2936 mode |= 0x10000;
2937
2938 return mode;
2939 }
2940
2941 /* Emit an X_floating library function call.
2942
2943 Note that these functions do not follow normal calling conventions:
2944 TFmode arguments are passed in two integer registers (as opposed to
2945 indirect); TFmode return values appear in R16+R17.
2946
2947 FUNC is the function to call.
2948 TARGET is where the output belongs.
2949 OPERANDS are the inputs.
2950 NOPERANDS is the count of inputs.
2951 EQUIV is the expression equivalent for the function.
2952 */
2953
2954 static void
2955 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2956 int noperands, rtx equiv)
2957 {
2958 rtx usage = NULL_RTX, tmp, reg;
2959 int regno = 16, i;
2960
2961 start_sequence ();
2962
2963 for (i = 0; i < noperands; ++i)
2964 {
2965 switch (GET_MODE (operands[i]))
2966 {
2967 case TFmode:
2968 reg = gen_rtx_REG (TFmode, regno);
2969 regno += 2;
2970 break;
2971
2972 case DFmode:
2973 reg = gen_rtx_REG (DFmode, regno + 32);
2974 regno += 1;
2975 break;
2976
2977 case VOIDmode:
2978 gcc_assert (GET_CODE (operands[i]) == CONST_INT);
2979 /* FALLTHRU */
2980 case DImode:
2981 reg = gen_rtx_REG (DImode, regno);
2982 regno += 1;
2983 break;
2984
2985 default:
2986 gcc_unreachable ();
2987 }
2988
2989 emit_move_insn (reg, operands[i]);
2990 usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
2991 }
2992
2993 switch (GET_MODE (target))
2994 {
2995 case TFmode:
2996 reg = gen_rtx_REG (TFmode, 16);
2997 break;
2998 case DFmode:
2999 reg = gen_rtx_REG (DFmode, 32);
3000 break;
3001 case DImode:
3002 reg = gen_rtx_REG (DImode, 0);
3003 break;
3004 default:
3005 gcc_unreachable ();
3006 }
3007
3008 tmp = gen_rtx_MEM (QImode, func);
3009 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3010 const0_rtx, const0_rtx));
3011 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3012 CONST_OR_PURE_CALL_P (tmp) = 1;
3013
3014 tmp = get_insns ();
3015 end_sequence ();
3016
3017 emit_libcall_block (tmp, target, reg, equiv);
3018 }
3019
3020 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3021
3022 void
3023 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3024 {
3025 rtx func;
3026 int mode;
3027 rtx out_operands[3];
3028
3029 func = alpha_lookup_xfloating_lib_func (code);
3030 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3031
3032 out_operands[0] = operands[1];
3033 out_operands[1] = operands[2];
3034 out_operands[2] = GEN_INT (mode);
3035 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3036 gen_rtx_fmt_ee (code, TFmode, operands[1],
3037 operands[2]));
3038 }
3039
3040 /* Emit an X_floating library function call for a comparison. */
3041
3042 static rtx
3043 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3044 {
3045 enum rtx_code cmp_code, res_code;
3046 rtx func, out, operands[2];
3047
3048 /* X_floating library comparison functions return
3049 -1 unordered
3050 0 false
3051 1 true
3052 Convert the compare against the raw return value. */
3053
3054 cmp_code = *pcode;
3055 switch (cmp_code)
3056 {
3057 case UNORDERED:
3058 cmp_code = EQ;
3059 res_code = LT;
3060 break;
3061 case ORDERED:
3062 cmp_code = EQ;
3063 res_code = GE;
3064 break;
3065 case NE:
3066 res_code = NE;
3067 break;
3068 case EQ:
3069 case LT:
3070 case GT:
3071 case LE:
3072 case GE:
3073 res_code = GT;
3074 break;
3075 default:
3076 gcc_unreachable ();
3077 }
3078 *pcode = res_code;
3079
3080 func = alpha_lookup_xfloating_lib_func (cmp_code);
3081
3082 operands[0] = op0;
3083 operands[1] = op1;
3084 out = gen_reg_rtx (DImode);
3085
3086 /* ??? Strange mode for equiv because what's actually returned
3087 is -1,0,1, not a proper boolean value. */
3088 alpha_emit_xfloating_libcall (func, out, operands, 2,
3089 gen_rtx_fmt_ee (cmp_code, CCmode, op0, op1));
3090
3091 return out;
3092 }
3093
3094 /* Emit an X_floating library function call for a conversion. */
3095
3096 void
3097 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3098 {
3099 int noperands = 1, mode;
3100 rtx out_operands[2];
3101 rtx func;
3102 enum rtx_code code = orig_code;
3103
3104 if (code == UNSIGNED_FIX)
3105 code = FIX;
3106
3107 func = alpha_lookup_xfloating_lib_func (code);
3108
3109 out_operands[0] = operands[1];
3110
3111 switch (code)
3112 {
3113 case FIX:
3114 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3115 out_operands[1] = GEN_INT (mode);
3116 noperands = 2;
3117 break;
3118 case FLOAT_TRUNCATE:
3119 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3120 out_operands[1] = GEN_INT (mode);
3121 noperands = 2;
3122 break;
3123 default:
3124 break;
3125 }
3126
3127 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3128 gen_rtx_fmt_e (orig_code,
3129 GET_MODE (operands[0]),
3130 operands[1]));
3131 }
3132
3133 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3134 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3135 guarantee that the sequence
3136 set (OP[0] OP[2])
3137 set (OP[1] OP[3])
3138 is valid. Naturally, output operand ordering is little-endian.
3139 This is used by *movtf_internal and *movti_internal. */
3140
3141 void
3142 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3143 bool fixup_overlap)
3144 {
3145 switch (GET_CODE (operands[1]))
3146 {
3147 case REG:
3148 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3149 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3150 break;
3151
3152 case MEM:
3153 operands[3] = adjust_address (operands[1], DImode, 8);
3154 operands[2] = adjust_address (operands[1], DImode, 0);
3155 break;
3156
3157 case CONST_INT:
3158 case CONST_DOUBLE:
3159 gcc_assert (operands[1] == CONST0_RTX (mode));
3160 operands[2] = operands[3] = const0_rtx;
3161 break;
3162
3163 default:
3164 gcc_unreachable ();
3165 }
3166
3167 switch (GET_CODE (operands[0]))
3168 {
3169 case REG:
3170 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3171 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3172 break;
3173
3174 case MEM:
3175 operands[1] = adjust_address (operands[0], DImode, 8);
3176 operands[0] = adjust_address (operands[0], DImode, 0);
3177 break;
3178
3179 default:
3180 gcc_unreachable ();
3181 }
3182
3183 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3184 {
3185 rtx tmp;
3186 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3187 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3188 }
3189 }
3190
3191 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3192 op2 is a register containing the sign bit, operation is the
3193 logical operation to be performed. */
3194
3195 void
3196 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3197 {
3198 rtx high_bit = operands[2];
3199 rtx scratch;
3200 int move;
3201
3202 alpha_split_tmode_pair (operands, TFmode, false);
3203
3204 /* Detect three flavors of operand overlap. */
3205 move = 1;
3206 if (rtx_equal_p (operands[0], operands[2]))
3207 move = 0;
3208 else if (rtx_equal_p (operands[1], operands[2]))
3209 {
3210 if (rtx_equal_p (operands[0], high_bit))
3211 move = 2;
3212 else
3213 move = -1;
3214 }
3215
3216 if (move < 0)
3217 emit_move_insn (operands[0], operands[2]);
3218
3219 /* ??? If the destination overlaps both source tf and high_bit, then
3220 assume source tf is dead in its entirety and use the other half
3221 for a scratch register. Otherwise "scratch" is just the proper
3222 destination register. */
3223 scratch = operands[move < 2 ? 1 : 3];
3224
3225 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3226
3227 if (move > 0)
3228 {
3229 emit_move_insn (operands[0], operands[2]);
3230 if (move > 1)
3231 emit_move_insn (operands[1], scratch);
3232 }
3233 }
3234 \f
3235 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3236 unaligned data:
3237
3238 unsigned: signed:
3239 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3240 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3241 lda r3,X(r11) lda r3,X+2(r11)
3242 extwl r1,r3,r1 extql r1,r3,r1
3243 extwh r2,r3,r2 extqh r2,r3,r2
3244 or r1.r2.r1 or r1,r2,r1
3245 sra r1,48,r1
3246
3247 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3248 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3249 lda r3,X(r11) lda r3,X(r11)
3250 extll r1,r3,r1 extll r1,r3,r1
3251 extlh r2,r3,r2 extlh r2,r3,r2
3252 or r1.r2.r1 addl r1,r2,r1
3253
3254 quad: ldq_u r1,X(r11)
3255 ldq_u r2,X+7(r11)
3256 lda r3,X(r11)
3257 extql r1,r3,r1
3258 extqh r2,r3,r2
3259 or r1.r2.r1
3260 */
3261
3262 void
3263 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3264 HOST_WIDE_INT ofs, int sign)
3265 {
3266 rtx meml, memh, addr, extl, exth, tmp, mema;
3267 enum machine_mode mode;
3268
3269 if (TARGET_BWX && size == 2)
3270 {
3271 meml = adjust_address (mem, QImode, ofs);
3272 memh = adjust_address (mem, QImode, ofs+1);
3273 if (BYTES_BIG_ENDIAN)
3274 tmp = meml, meml = memh, memh = tmp;
3275 extl = gen_reg_rtx (DImode);
3276 exth = gen_reg_rtx (DImode);
3277 emit_insn (gen_zero_extendqidi2 (extl, meml));
3278 emit_insn (gen_zero_extendqidi2 (exth, memh));
3279 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3280 NULL, 1, OPTAB_LIB_WIDEN);
3281 addr = expand_simple_binop (DImode, IOR, extl, exth,
3282 NULL, 1, OPTAB_LIB_WIDEN);
3283
3284 if (sign && GET_MODE (tgt) != HImode)
3285 {
3286 addr = gen_lowpart (HImode, addr);
3287 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3288 }
3289 else
3290 {
3291 if (GET_MODE (tgt) != DImode)
3292 addr = gen_lowpart (GET_MODE (tgt), addr);
3293 emit_move_insn (tgt, addr);
3294 }
3295 return;
3296 }
3297
3298 meml = gen_reg_rtx (DImode);
3299 memh = gen_reg_rtx (DImode);
3300 addr = gen_reg_rtx (DImode);
3301 extl = gen_reg_rtx (DImode);
3302 exth = gen_reg_rtx (DImode);
3303
3304 mema = XEXP (mem, 0);
3305 if (GET_CODE (mema) == LO_SUM)
3306 mema = force_reg (Pmode, mema);
3307
3308 /* AND addresses cannot be in any alias set, since they may implicitly
3309 alias surrounding code. Ideally we'd have some alias set that
3310 covered all types except those with alignment 8 or higher. */
3311
3312 tmp = change_address (mem, DImode,
3313 gen_rtx_AND (DImode,
3314 plus_constant (mema, ofs),
3315 GEN_INT (-8)));
3316 set_mem_alias_set (tmp, 0);
3317 emit_move_insn (meml, tmp);
3318
3319 tmp = change_address (mem, DImode,
3320 gen_rtx_AND (DImode,
3321 plus_constant (mema, ofs + size - 1),
3322 GEN_INT (-8)));
3323 set_mem_alias_set (tmp, 0);
3324 emit_move_insn (memh, tmp);
3325
3326 if (WORDS_BIG_ENDIAN && sign && (size == 2 || size == 4))
3327 {
3328 emit_move_insn (addr, plus_constant (mema, -1));
3329
3330 emit_insn (gen_extqh_be (extl, meml, addr));
3331 emit_insn (gen_extxl_be (exth, memh, GEN_INT (64), addr));
3332
3333 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3334 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (64 - size*8),
3335 addr, 1, OPTAB_WIDEN);
3336 }
3337 else if (sign && size == 2)
3338 {
3339 emit_move_insn (addr, plus_constant (mema, ofs+2));
3340
3341 emit_insn (gen_extxl_le (extl, meml, GEN_INT (64), addr));
3342 emit_insn (gen_extqh_le (exth, memh, addr));
3343
3344 /* We must use tgt here for the target. Alpha-vms port fails if we use
3345 addr for the target, because addr is marked as a pointer and combine
3346 knows that pointers are always sign-extended 32-bit values. */
3347 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3348 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3349 addr, 1, OPTAB_WIDEN);
3350 }
3351 else
3352 {
3353 if (WORDS_BIG_ENDIAN)
3354 {
3355 emit_move_insn (addr, plus_constant (mema, ofs+size-1));
3356 switch ((int) size)
3357 {
3358 case 2:
3359 emit_insn (gen_extwh_be (extl, meml, addr));
3360 mode = HImode;
3361 break;
3362
3363 case 4:
3364 emit_insn (gen_extlh_be (extl, meml, addr));
3365 mode = SImode;
3366 break;
3367
3368 case 8:
3369 emit_insn (gen_extqh_be (extl, meml, addr));
3370 mode = DImode;
3371 break;
3372
3373 default:
3374 gcc_unreachable ();
3375 }
3376 emit_insn (gen_extxl_be (exth, memh, GEN_INT (size*8), addr));
3377 }
3378 else
3379 {
3380 emit_move_insn (addr, plus_constant (mema, ofs));
3381 emit_insn (gen_extxl_le (extl, meml, GEN_INT (size*8), addr));
3382 switch ((int) size)
3383 {
3384 case 2:
3385 emit_insn (gen_extwh_le (exth, memh, addr));
3386 mode = HImode;
3387 break;
3388
3389 case 4:
3390 emit_insn (gen_extlh_le (exth, memh, addr));
3391 mode = SImode;
3392 break;
3393
3394 case 8:
3395 emit_insn (gen_extqh_le (exth, memh, addr));
3396 mode = DImode;
3397 break;
3398
3399 default:
3400 gcc_unreachable ();
3401 }
3402 }
3403
3404 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3405 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3406 sign, OPTAB_WIDEN);
3407 }
3408
3409 if (addr != tgt)
3410 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3411 }
3412
3413 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3414
3415 void
3416 alpha_expand_unaligned_store (rtx dst, rtx src,
3417 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3418 {
3419 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3420
3421 if (TARGET_BWX && size == 2)
3422 {
3423 if (src != const0_rtx)
3424 {
3425 dstl = gen_lowpart (QImode, src);
3426 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3427 NULL, 1, OPTAB_LIB_WIDEN);
3428 dsth = gen_lowpart (QImode, dsth);
3429 }
3430 else
3431 dstl = dsth = const0_rtx;
3432
3433 meml = adjust_address (dst, QImode, ofs);
3434 memh = adjust_address (dst, QImode, ofs+1);
3435 if (BYTES_BIG_ENDIAN)
3436 addr = meml, meml = memh, memh = addr;
3437
3438 emit_move_insn (meml, dstl);
3439 emit_move_insn (memh, dsth);
3440 return;
3441 }
3442
3443 dstl = gen_reg_rtx (DImode);
3444 dsth = gen_reg_rtx (DImode);
3445 insl = gen_reg_rtx (DImode);
3446 insh = gen_reg_rtx (DImode);
3447
3448 dsta = XEXP (dst, 0);
3449 if (GET_CODE (dsta) == LO_SUM)
3450 dsta = force_reg (Pmode, dsta);
3451
3452 /* AND addresses cannot be in any alias set, since they may implicitly
3453 alias surrounding code. Ideally we'd have some alias set that
3454 covered all types except those with alignment 8 or higher. */
3455
3456 meml = change_address (dst, DImode,
3457 gen_rtx_AND (DImode,
3458 plus_constant (dsta, ofs),
3459 GEN_INT (-8)));
3460 set_mem_alias_set (meml, 0);
3461
3462 memh = change_address (dst, DImode,
3463 gen_rtx_AND (DImode,
3464 plus_constant (dsta, ofs + size - 1),
3465 GEN_INT (-8)));
3466 set_mem_alias_set (memh, 0);
3467
3468 emit_move_insn (dsth, memh);
3469 emit_move_insn (dstl, meml);
3470 if (WORDS_BIG_ENDIAN)
3471 {
3472 addr = copy_addr_to_reg (plus_constant (dsta, ofs+size-1));
3473
3474 if (src != const0_rtx)
3475 {
3476 switch ((int) size)
3477 {
3478 case 2:
3479 emit_insn (gen_inswl_be (insh, gen_lowpart (HImode,src), addr));
3480 break;
3481 case 4:
3482 emit_insn (gen_insll_be (insh, gen_lowpart (SImode,src), addr));
3483 break;
3484 case 8:
3485 emit_insn (gen_insql_be (insh, gen_lowpart (DImode,src), addr));
3486 break;
3487 }
3488 emit_insn (gen_insxh (insl, gen_lowpart (DImode, src),
3489 GEN_INT (size*8), addr));
3490 }
3491
3492 switch ((int) size)
3493 {
3494 case 2:
3495 emit_insn (gen_mskxl_be (dsth, dsth, GEN_INT (0xffff), addr));
3496 break;
3497 case 4:
3498 {
3499 rtx msk = immed_double_const (0xffffffff, 0, DImode);
3500 emit_insn (gen_mskxl_be (dsth, dsth, msk, addr));
3501 break;
3502 }
3503 case 8:
3504 emit_insn (gen_mskxl_be (dsth, dsth, constm1_rtx, addr));
3505 break;
3506 }
3507
3508 emit_insn (gen_mskxh (dstl, dstl, GEN_INT (size*8), addr));
3509 }
3510 else
3511 {
3512 addr = copy_addr_to_reg (plus_constant (dsta, ofs));
3513
3514 if (src != CONST0_RTX (GET_MODE (src)))
3515 {
3516 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3517 GEN_INT (size*8), addr));
3518
3519 switch ((int) size)
3520 {
3521 case 2:
3522 emit_insn (gen_inswl_le (insl, gen_lowpart (HImode, src), addr));
3523 break;
3524 case 4:
3525 emit_insn (gen_insll_le (insl, gen_lowpart (SImode, src), addr));
3526 break;
3527 case 8:
3528 emit_insn (gen_insql_le (insl, src, addr));
3529 break;
3530 }
3531 }
3532
3533 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3534
3535 switch ((int) size)
3536 {
3537 case 2:
3538 emit_insn (gen_mskxl_le (dstl, dstl, GEN_INT (0xffff), addr));
3539 break;
3540 case 4:
3541 {
3542 rtx msk = immed_double_const (0xffffffff, 0, DImode);
3543 emit_insn (gen_mskxl_le (dstl, dstl, msk, addr));
3544 break;
3545 }
3546 case 8:
3547 emit_insn (gen_mskxl_le (dstl, dstl, constm1_rtx, addr));
3548 break;
3549 }
3550 }
3551
3552 if (src != CONST0_RTX (GET_MODE (src)))
3553 {
3554 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3555 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3556 }
3557
3558 if (WORDS_BIG_ENDIAN)
3559 {
3560 emit_move_insn (meml, dstl);
3561 emit_move_insn (memh, dsth);
3562 }
3563 else
3564 {
3565 /* Must store high before low for degenerate case of aligned. */
3566 emit_move_insn (memh, dsth);
3567 emit_move_insn (meml, dstl);
3568 }
3569 }
3570
3571 /* The block move code tries to maximize speed by separating loads and
3572 stores at the expense of register pressure: we load all of the data
3573 before we store it back out. There are two secondary effects worth
3574 mentioning, that this speeds copying to/from aligned and unaligned
3575 buffers, and that it makes the code significantly easier to write. */
3576
3577 #define MAX_MOVE_WORDS 8
3578
3579 /* Load an integral number of consecutive unaligned quadwords. */
3580
3581 static void
3582 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3583 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3584 {
3585 rtx const im8 = GEN_INT (-8);
3586 rtx const i64 = GEN_INT (64);
3587 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3588 rtx sreg, areg, tmp, smema;
3589 HOST_WIDE_INT i;
3590
3591 smema = XEXP (smem, 0);
3592 if (GET_CODE (smema) == LO_SUM)
3593 smema = force_reg (Pmode, smema);
3594
3595 /* Generate all the tmp registers we need. */
3596 for (i = 0; i < words; ++i)
3597 {
3598 data_regs[i] = out_regs[i];
3599 ext_tmps[i] = gen_reg_rtx (DImode);
3600 }
3601 data_regs[words] = gen_reg_rtx (DImode);
3602
3603 if (ofs != 0)
3604 smem = adjust_address (smem, GET_MODE (smem), ofs);
3605
3606 /* Load up all of the source data. */
3607 for (i = 0; i < words; ++i)
3608 {
3609 tmp = change_address (smem, DImode,
3610 gen_rtx_AND (DImode,
3611 plus_constant (smema, 8*i),
3612 im8));
3613 set_mem_alias_set (tmp, 0);
3614 emit_move_insn (data_regs[i], tmp);
3615 }
3616
3617 tmp = change_address (smem, DImode,
3618 gen_rtx_AND (DImode,
3619 plus_constant (smema, 8*words - 1),
3620 im8));
3621 set_mem_alias_set (tmp, 0);
3622 emit_move_insn (data_regs[words], tmp);
3623
3624 /* Extract the half-word fragments. Unfortunately DEC decided to make
3625 extxh with offset zero a noop instead of zeroing the register, so
3626 we must take care of that edge condition ourselves with cmov. */
3627
3628 sreg = copy_addr_to_reg (smema);
3629 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3630 1, OPTAB_WIDEN);
3631 if (WORDS_BIG_ENDIAN)
3632 emit_move_insn (sreg, plus_constant (sreg, 7));
3633 for (i = 0; i < words; ++i)
3634 {
3635 if (WORDS_BIG_ENDIAN)
3636 {
3637 emit_insn (gen_extqh_be (data_regs[i], data_regs[i], sreg));
3638 emit_insn (gen_extxl_be (ext_tmps[i], data_regs[i+1], i64, sreg));
3639 }
3640 else
3641 {
3642 emit_insn (gen_extxl_le (data_regs[i], data_regs[i], i64, sreg));
3643 emit_insn (gen_extqh_le (ext_tmps[i], data_regs[i+1], sreg));
3644 }
3645 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3646 gen_rtx_IF_THEN_ELSE (DImode,
3647 gen_rtx_EQ (DImode, areg,
3648 const0_rtx),
3649 const0_rtx, ext_tmps[i])));
3650 }
3651
3652 /* Merge the half-words into whole words. */
3653 for (i = 0; i < words; ++i)
3654 {
3655 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3656 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3657 }
3658 }
3659
3660 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3661 may be NULL to store zeros. */
3662
3663 static void
3664 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3665 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3666 {
3667 rtx const im8 = GEN_INT (-8);
3668 rtx const i64 = GEN_INT (64);
3669 rtx ins_tmps[MAX_MOVE_WORDS];
3670 rtx st_tmp_1, st_tmp_2, dreg;
3671 rtx st_addr_1, st_addr_2, dmema;
3672 HOST_WIDE_INT i;
3673
3674 dmema = XEXP (dmem, 0);
3675 if (GET_CODE (dmema) == LO_SUM)
3676 dmema = force_reg (Pmode, dmema);
3677
3678 /* Generate all the tmp registers we need. */
3679 if (data_regs != NULL)
3680 for (i = 0; i < words; ++i)
3681 ins_tmps[i] = gen_reg_rtx(DImode);
3682 st_tmp_1 = gen_reg_rtx(DImode);
3683 st_tmp_2 = gen_reg_rtx(DImode);
3684
3685 if (ofs != 0)
3686 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3687
3688 st_addr_2 = change_address (dmem, DImode,
3689 gen_rtx_AND (DImode,
3690 plus_constant (dmema, words*8 - 1),
3691 im8));
3692 set_mem_alias_set (st_addr_2, 0);
3693
3694 st_addr_1 = change_address (dmem, DImode,
3695 gen_rtx_AND (DImode, dmema, im8));
3696 set_mem_alias_set (st_addr_1, 0);
3697
3698 /* Load up the destination end bits. */
3699 emit_move_insn (st_tmp_2, st_addr_2);
3700 emit_move_insn (st_tmp_1, st_addr_1);
3701
3702 /* Shift the input data into place. */
3703 dreg = copy_addr_to_reg (dmema);
3704 if (WORDS_BIG_ENDIAN)
3705 emit_move_insn (dreg, plus_constant (dreg, 7));
3706 if (data_regs != NULL)
3707 {
3708 for (i = words-1; i >= 0; --i)
3709 {
3710 if (WORDS_BIG_ENDIAN)
3711 {
3712 emit_insn (gen_insql_be (ins_tmps[i], data_regs[i], dreg));
3713 emit_insn (gen_insxh (data_regs[i], data_regs[i], i64, dreg));
3714 }
3715 else
3716 {
3717 emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
3718 emit_insn (gen_insql_le (data_regs[i], data_regs[i], dreg));
3719 }
3720 }
3721 for (i = words-1; i > 0; --i)
3722 {
3723 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3724 ins_tmps[i-1], ins_tmps[i-1], 1,
3725 OPTAB_WIDEN);
3726 }
3727 }
3728
3729 /* Split and merge the ends with the destination data. */
3730 if (WORDS_BIG_ENDIAN)
3731 {
3732 emit_insn (gen_mskxl_be (st_tmp_2, st_tmp_2, constm1_rtx, dreg));
3733 emit_insn (gen_mskxh (st_tmp_1, st_tmp_1, i64, dreg));
3734 }
3735 else
3736 {
3737 emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
3738 emit_insn (gen_mskxl_le (st_tmp_1, st_tmp_1, constm1_rtx, dreg));
3739 }
3740
3741 if (data_regs != NULL)
3742 {
3743 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3744 st_tmp_2, 1, OPTAB_WIDEN);
3745 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3746 st_tmp_1, 1, OPTAB_WIDEN);
3747 }
3748
3749 /* Store it all. */
3750 if (WORDS_BIG_ENDIAN)
3751 emit_move_insn (st_addr_1, st_tmp_1);
3752 else
3753 emit_move_insn (st_addr_2, st_tmp_2);
3754 for (i = words-1; i > 0; --i)
3755 {
3756 rtx tmp = change_address (dmem, DImode,
3757 gen_rtx_AND (DImode,
3758 plus_constant(dmema,
3759 WORDS_BIG_ENDIAN ? i*8-1 : i*8),
3760 im8));
3761 set_mem_alias_set (tmp, 0);
3762 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3763 }
3764 if (WORDS_BIG_ENDIAN)
3765 emit_move_insn (st_addr_2, st_tmp_2);
3766 else
3767 emit_move_insn (st_addr_1, st_tmp_1);
3768 }
3769
3770
3771 /* Expand string/block move operations.
3772
3773 operands[0] is the pointer to the destination.
3774 operands[1] is the pointer to the source.
3775 operands[2] is the number of bytes to move.
3776 operands[3] is the alignment. */
3777
3778 int
3779 alpha_expand_block_move (rtx operands[])
3780 {
3781 rtx bytes_rtx = operands[2];
3782 rtx align_rtx = operands[3];
3783 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3784 HOST_WIDE_INT bytes = orig_bytes;
3785 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3786 HOST_WIDE_INT dst_align = src_align;
3787 rtx orig_src = operands[1];
3788 rtx orig_dst = operands[0];
3789 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3790 rtx tmp;
3791 unsigned int i, words, ofs, nregs = 0;
3792
3793 if (orig_bytes <= 0)
3794 return 1;
3795 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3796 return 0;
3797
3798 /* Look for additional alignment information from recorded register info. */
3799
3800 tmp = XEXP (orig_src, 0);
3801 if (GET_CODE (tmp) == REG)
3802 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3803 else if (GET_CODE (tmp) == PLUS
3804 && GET_CODE (XEXP (tmp, 0)) == REG
3805 && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3806 {
3807 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3808 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3809
3810 if (a > src_align)
3811 {
3812 if (a >= 64 && c % 8 == 0)
3813 src_align = 64;
3814 else if (a >= 32 && c % 4 == 0)
3815 src_align = 32;
3816 else if (a >= 16 && c % 2 == 0)
3817 src_align = 16;
3818 }
3819 }
3820
3821 tmp = XEXP (orig_dst, 0);
3822 if (GET_CODE (tmp) == REG)
3823 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3824 else if (GET_CODE (tmp) == PLUS
3825 && GET_CODE (XEXP (tmp, 0)) == REG
3826 && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3827 {
3828 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3829 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3830
3831 if (a > dst_align)
3832 {
3833 if (a >= 64 && c % 8 == 0)
3834 dst_align = 64;
3835 else if (a >= 32 && c % 4 == 0)
3836 dst_align = 32;
3837 else if (a >= 16 && c % 2 == 0)
3838 dst_align = 16;
3839 }
3840 }
3841
3842 ofs = 0;
3843 if (src_align >= 64 && bytes >= 8)
3844 {
3845 words = bytes / 8;
3846
3847 for (i = 0; i < words; ++i)
3848 data_regs[nregs + i] = gen_reg_rtx (DImode);
3849
3850 for (i = 0; i < words; ++i)
3851 emit_move_insn (data_regs[nregs + i],
3852 adjust_address (orig_src, DImode, ofs + i * 8));
3853
3854 nregs += words;
3855 bytes -= words * 8;
3856 ofs += words * 8;
3857 }
3858
3859 if (src_align >= 32 && bytes >= 4)
3860 {
3861 words = bytes / 4;
3862
3863 for (i = 0; i < words; ++i)
3864 data_regs[nregs + i] = gen_reg_rtx (SImode);
3865
3866 for (i = 0; i < words; ++i)
3867 emit_move_insn (data_regs[nregs + i],
3868 adjust_address (orig_src, SImode, ofs + i * 4));
3869
3870 nregs += words;
3871 bytes -= words * 4;
3872 ofs += words * 4;
3873 }
3874
3875 if (bytes >= 8)
3876 {
3877 words = bytes / 8;
3878
3879 for (i = 0; i < words+1; ++i)
3880 data_regs[nregs + i] = gen_reg_rtx (DImode);
3881
3882 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3883 words, ofs);
3884
3885 nregs += words;
3886 bytes -= words * 8;
3887 ofs += words * 8;
3888 }
3889
3890 if (! TARGET_BWX && bytes >= 4)
3891 {
3892 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3893 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3894 bytes -= 4;
3895 ofs += 4;
3896 }
3897
3898 if (bytes >= 2)
3899 {
3900 if (src_align >= 16)
3901 {
3902 do {
3903 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3904 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3905 bytes -= 2;
3906 ofs += 2;
3907 } while (bytes >= 2);
3908 }
3909 else if (! TARGET_BWX)
3910 {
3911 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3912 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3913 bytes -= 2;
3914 ofs += 2;
3915 }
3916 }
3917
3918 while (bytes > 0)
3919 {
3920 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3921 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3922 bytes -= 1;
3923 ofs += 1;
3924 }
3925
3926 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3927
3928 /* Now save it back out again. */
3929
3930 i = 0, ofs = 0;
3931
3932 /* Write out the data in whatever chunks reading the source allowed. */
3933 if (dst_align >= 64)
3934 {
3935 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3936 {
3937 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3938 data_regs[i]);
3939 ofs += 8;
3940 i++;
3941 }
3942 }
3943
3944 if (dst_align >= 32)
3945 {
3946 /* If the source has remaining DImode regs, write them out in
3947 two pieces. */
3948 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3949 {
3950 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3951 NULL_RTX, 1, OPTAB_WIDEN);
3952
3953 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3954 gen_lowpart (SImode, data_regs[i]));
3955 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3956 gen_lowpart (SImode, tmp));
3957 ofs += 8;
3958 i++;
3959 }
3960
3961 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3962 {
3963 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3964 data_regs[i]);
3965 ofs += 4;
3966 i++;
3967 }
3968 }
3969
3970 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3971 {
3972 /* Write out a remaining block of words using unaligned methods. */
3973
3974 for (words = 1; i + words < nregs; words++)
3975 if (GET_MODE (data_regs[i + words]) != DImode)
3976 break;
3977
3978 if (words == 1)
3979 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3980 else
3981 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3982 words, ofs);
3983
3984 i += words;
3985 ofs += words * 8;
3986 }
3987
3988 /* Due to the above, this won't be aligned. */
3989 /* ??? If we have more than one of these, consider constructing full
3990 words in registers and using alpha_expand_unaligned_store_words. */
3991 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3992 {
3993 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3994 ofs += 4;
3995 i++;
3996 }
3997
3998 if (dst_align >= 16)
3999 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4000 {
4001 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4002 i++;
4003 ofs += 2;
4004 }
4005 else
4006 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4007 {
4008 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4009 i++;
4010 ofs += 2;
4011 }
4012
4013 /* The remainder must be byte copies. */
4014 while (i < nregs)
4015 {
4016 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4017 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4018 i++;
4019 ofs += 1;
4020 }
4021
4022 return 1;
4023 }
4024
4025 int
4026 alpha_expand_block_clear (rtx operands[])
4027 {
4028 rtx bytes_rtx = operands[1];
4029 rtx align_rtx = operands[3];
4030 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4031 HOST_WIDE_INT bytes = orig_bytes;
4032 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4033 HOST_WIDE_INT alignofs = 0;
4034 rtx orig_dst = operands[0];
4035 rtx tmp;
4036 int i, words, ofs = 0;
4037
4038 if (orig_bytes <= 0)
4039 return 1;
4040 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4041 return 0;
4042
4043 /* Look for stricter alignment. */
4044 tmp = XEXP (orig_dst, 0);
4045 if (GET_CODE (tmp) == REG)
4046 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4047 else if (GET_CODE (tmp) == PLUS
4048 && GET_CODE (XEXP (tmp, 0)) == REG
4049 && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
4050 {
4051 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4052 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4053
4054 if (a > align)
4055 {
4056 if (a >= 64)
4057 align = a, alignofs = 8 - c % 8;
4058 else if (a >= 32)
4059 align = a, alignofs = 4 - c % 4;
4060 else if (a >= 16)
4061 align = a, alignofs = 2 - c % 2;
4062 }
4063 }
4064
4065 /* Handle an unaligned prefix first. */
4066
4067 if (alignofs > 0)
4068 {
4069 #if HOST_BITS_PER_WIDE_INT >= 64
4070 /* Given that alignofs is bounded by align, the only time BWX could
4071 generate three stores is for a 7 byte fill. Prefer two individual
4072 stores over a load/mask/store sequence. */
4073 if ((!TARGET_BWX || alignofs == 7)
4074 && align >= 32
4075 && !(alignofs == 4 && bytes >= 4))
4076 {
4077 enum machine_mode mode = (align >= 64 ? DImode : SImode);
4078 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4079 rtx mem, tmp;
4080 HOST_WIDE_INT mask;
4081
4082 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4083 set_mem_alias_set (mem, 0);
4084
4085 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
4086 if (bytes < alignofs)
4087 {
4088 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
4089 ofs += bytes;
4090 bytes = 0;
4091 }
4092 else
4093 {
4094 bytes -= alignofs;
4095 ofs += alignofs;
4096 }
4097 alignofs = 0;
4098
4099 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4100 NULL_RTX, 1, OPTAB_WIDEN);
4101
4102 emit_move_insn (mem, tmp);
4103 }
4104 #endif
4105
4106 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4107 {
4108 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4109 bytes -= 1;
4110 ofs += 1;
4111 alignofs -= 1;
4112 }
4113 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4114 {
4115 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4116 bytes -= 2;
4117 ofs += 2;
4118 alignofs -= 2;
4119 }
4120 if (alignofs == 4 && bytes >= 4)
4121 {
4122 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4123 bytes -= 4;
4124 ofs += 4;
4125 alignofs = 0;
4126 }
4127
4128 /* If we've not used the extra lead alignment information by now,
4129 we won't be able to. Downgrade align to match what's left over. */
4130 if (alignofs > 0)
4131 {
4132 alignofs = alignofs & -alignofs;
4133 align = MIN (align, alignofs * BITS_PER_UNIT);
4134 }
4135 }
4136
4137 /* Handle a block of contiguous long-words. */
4138
4139 if (align >= 64 && bytes >= 8)
4140 {
4141 words = bytes / 8;
4142
4143 for (i = 0; i < words; ++i)
4144 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4145 const0_rtx);
4146
4147 bytes -= words * 8;
4148 ofs += words * 8;
4149 }
4150
4151 /* If the block is large and appropriately aligned, emit a single
4152 store followed by a sequence of stq_u insns. */
4153
4154 if (align >= 32 && bytes > 16)
4155 {
4156 rtx orig_dsta;
4157
4158 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4159 bytes -= 4;
4160 ofs += 4;
4161
4162 orig_dsta = XEXP (orig_dst, 0);
4163 if (GET_CODE (orig_dsta) == LO_SUM)
4164 orig_dsta = force_reg (Pmode, orig_dsta);
4165
4166 words = bytes / 8;
4167 for (i = 0; i < words; ++i)
4168 {
4169 rtx mem
4170 = change_address (orig_dst, DImode,
4171 gen_rtx_AND (DImode,
4172 plus_constant (orig_dsta, ofs + i*8),
4173 GEN_INT (-8)));
4174 set_mem_alias_set (mem, 0);
4175 emit_move_insn (mem, const0_rtx);
4176 }
4177
4178 /* Depending on the alignment, the first stq_u may have overlapped
4179 with the initial stl, which means that the last stq_u didn't
4180 write as much as it would appear. Leave those questionable bytes
4181 unaccounted for. */
4182 bytes -= words * 8 - 4;
4183 ofs += words * 8 - 4;
4184 }
4185
4186 /* Handle a smaller block of aligned words. */
4187
4188 if ((align >= 64 && bytes == 4)
4189 || (align == 32 && bytes >= 4))
4190 {
4191 words = bytes / 4;
4192
4193 for (i = 0; i < words; ++i)
4194 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4195 const0_rtx);
4196
4197 bytes -= words * 4;
4198 ofs += words * 4;
4199 }
4200
4201 /* An unaligned block uses stq_u stores for as many as possible. */
4202
4203 if (bytes >= 8)
4204 {
4205 words = bytes / 8;
4206
4207 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4208
4209 bytes -= words * 8;
4210 ofs += words * 8;
4211 }
4212
4213 /* Next clean up any trailing pieces. */
4214
4215 #if HOST_BITS_PER_WIDE_INT >= 64
4216 /* Count the number of bits in BYTES for which aligned stores could
4217 be emitted. */
4218 words = 0;
4219 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4220 if (bytes & i)
4221 words += 1;
4222
4223 /* If we have appropriate alignment (and it wouldn't take too many
4224 instructions otherwise), mask out the bytes we need. */
4225 if (TARGET_BWX ? words > 2 : bytes > 0)
4226 {
4227 if (align >= 64)
4228 {
4229 rtx mem, tmp;
4230 HOST_WIDE_INT mask;
4231
4232 mem = adjust_address (orig_dst, DImode, ofs);
4233 set_mem_alias_set (mem, 0);
4234
4235 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4236
4237 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4238 NULL_RTX, 1, OPTAB_WIDEN);
4239
4240 emit_move_insn (mem, tmp);
4241 return 1;
4242 }
4243 else if (align >= 32 && bytes < 4)
4244 {
4245 rtx mem, tmp;
4246 HOST_WIDE_INT mask;
4247
4248 mem = adjust_address (orig_dst, SImode, ofs);
4249 set_mem_alias_set (mem, 0);
4250
4251 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4252
4253 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4254 NULL_RTX, 1, OPTAB_WIDEN);
4255
4256 emit_move_insn (mem, tmp);
4257 return 1;
4258 }
4259 }
4260 #endif
4261
4262 if (!TARGET_BWX && bytes >= 4)
4263 {
4264 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4265 bytes -= 4;
4266 ofs += 4;
4267 }
4268
4269 if (bytes >= 2)
4270 {
4271 if (align >= 16)
4272 {
4273 do {
4274 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4275 const0_rtx);
4276 bytes -= 2;
4277 ofs += 2;
4278 } while (bytes >= 2);
4279 }
4280 else if (! TARGET_BWX)
4281 {
4282 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4283 bytes -= 2;
4284 ofs += 2;
4285 }
4286 }
4287
4288 while (bytes > 0)
4289 {
4290 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4291 bytes -= 1;
4292 ofs += 1;
4293 }
4294
4295 return 1;
4296 }
4297
4298 /* Returns a mask so that zap(x, value) == x & mask. */
4299
4300 rtx
4301 alpha_expand_zap_mask (HOST_WIDE_INT value)
4302 {
4303 rtx result;
4304 int i;
4305
4306 if (HOST_BITS_PER_WIDE_INT >= 64)
4307 {
4308 HOST_WIDE_INT mask = 0;
4309
4310 for (i = 7; i >= 0; --i)
4311 {
4312 mask <<= 8;
4313 if (!((value >> i) & 1))
4314 mask |= 0xff;
4315 }
4316
4317 result = gen_int_mode (mask, DImode);
4318 }
4319 else
4320 {
4321 HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4322
4323 gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4324
4325 for (i = 7; i >= 4; --i)
4326 {
4327 mask_hi <<= 8;
4328 if (!((value >> i) & 1))
4329 mask_hi |= 0xff;
4330 }
4331
4332 for (i = 3; i >= 0; --i)
4333 {
4334 mask_lo <<= 8;
4335 if (!((value >> i) & 1))
4336 mask_lo |= 0xff;
4337 }
4338
4339 result = immed_double_const (mask_lo, mask_hi, DImode);
4340 }
4341
4342 return result;
4343 }
4344
4345 void
4346 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4347 enum machine_mode mode,
4348 rtx op0, rtx op1, rtx op2)
4349 {
4350 op0 = gen_lowpart (mode, op0);
4351
4352 if (op1 == const0_rtx)
4353 op1 = CONST0_RTX (mode);
4354 else
4355 op1 = gen_lowpart (mode, op1);
4356
4357 if (op2 == const0_rtx)
4358 op2 = CONST0_RTX (mode);
4359 else
4360 op2 = gen_lowpart (mode, op2);
4361
4362 emit_insn ((*gen) (op0, op1, op2));
4363 }
4364
4365 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4366 COND is true. Mark the jump as unlikely to be taken. */
4367
4368 static void
4369 emit_unlikely_jump (rtx cond, rtx label)
4370 {
4371 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
4372 rtx x;
4373
4374 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4375 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4376 REG_NOTES (x) = gen_rtx_EXPR_LIST (REG_BR_PROB, very_unlikely, NULL_RTX);
4377 }
4378
4379 /* A subroutine of the atomic operation splitters. Emit a load-locked
4380 instruction in MODE. */
4381
4382 static void
4383 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4384 {
4385 rtx (*fn) (rtx, rtx) = NULL;
4386 if (mode == SImode)
4387 fn = gen_load_locked_si;
4388 else if (mode == DImode)
4389 fn = gen_load_locked_di;
4390 emit_insn (fn (reg, mem));
4391 }
4392
4393 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4394 instruction in MODE. */
4395
4396 static void
4397 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4398 {
4399 rtx (*fn) (rtx, rtx, rtx) = NULL;
4400 if (mode == SImode)
4401 fn = gen_store_conditional_si;
4402 else if (mode == DImode)
4403 fn = gen_store_conditional_di;
4404 emit_insn (fn (res, mem, val));
4405 }
4406
4407 /* A subroutine of the atomic operation splitters. Emit an insxl
4408 instruction in MODE. */
4409
4410 static rtx
4411 emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4412 {
4413 rtx ret = gen_reg_rtx (DImode);
4414 rtx (*fn) (rtx, rtx, rtx);
4415
4416 if (WORDS_BIG_ENDIAN)
4417 {
4418 if (mode == QImode)
4419 fn = gen_insbl_be;
4420 else
4421 fn = gen_inswl_be;
4422 }
4423 else
4424 {
4425 if (mode == QImode)
4426 fn = gen_insbl_le;
4427 else
4428 fn = gen_inswl_le;
4429 }
4430 /* The insbl and inswl patterns require a register operand. */
4431 op1 = force_reg (mode, op1);
4432 emit_insn (fn (ret, op1, op2));
4433
4434 return ret;
4435 }
4436
4437 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4438 to perform. MEM is the memory on which to operate. VAL is the second
4439 operand of the binary operator. BEFORE and AFTER are optional locations to
4440 return the value of MEM either before of after the operation. SCRATCH is
4441 a scratch register. */
4442
4443 void
4444 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val,
4445 rtx before, rtx after, rtx scratch)
4446 {
4447 enum machine_mode mode = GET_MODE (mem);
4448 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4449
4450 emit_insn (gen_memory_barrier ());
4451
4452 label = gen_label_rtx ();
4453 emit_label (label);
4454 label = gen_rtx_LABEL_REF (DImode, label);
4455
4456 if (before == NULL)
4457 before = scratch;
4458 emit_load_locked (mode, before, mem);
4459
4460 if (code == NOT)
4461 x = gen_rtx_AND (mode, gen_rtx_NOT (mode, before), val);
4462 else
4463 x = gen_rtx_fmt_ee (code, mode, before, val);
4464 if (after)
4465 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4466 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4467
4468 emit_store_conditional (mode, cond, mem, scratch);
4469
4470 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4471 emit_unlikely_jump (x, label);
4472
4473 emit_insn (gen_memory_barrier ());
4474 }
4475
4476 /* Expand a compare and swap operation. */
4477
4478 void
4479 alpha_split_compare_and_swap (rtx retval, rtx mem, rtx oldval, rtx newval,
4480 rtx scratch)
4481 {
4482 enum machine_mode mode = GET_MODE (mem);
4483 rtx label1, label2, x, cond = gen_lowpart (DImode, scratch);
4484
4485 emit_insn (gen_memory_barrier ());
4486
4487 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4488 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4489 emit_label (XEXP (label1, 0));
4490
4491 emit_load_locked (mode, retval, mem);
4492
4493 x = gen_lowpart (DImode, retval);
4494 if (oldval == const0_rtx)
4495 x = gen_rtx_NE (DImode, x, const0_rtx);
4496 else
4497 {
4498 x = gen_rtx_EQ (DImode, x, oldval);
4499 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4500 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4501 }
4502 emit_unlikely_jump (x, label2);
4503
4504 emit_move_insn (scratch, newval);
4505 emit_store_conditional (mode, cond, mem, scratch);
4506
4507 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4508 emit_unlikely_jump (x, label1);
4509
4510 emit_insn (gen_memory_barrier ());
4511 emit_label (XEXP (label2, 0));
4512 }
4513
4514 void
4515 alpha_expand_compare_and_swap_12 (rtx dst, rtx mem, rtx oldval, rtx newval)
4516 {
4517 enum machine_mode mode = GET_MODE (mem);
4518 rtx addr, align, wdst;
4519 rtx (*fn5) (rtx, rtx, rtx, rtx, rtx);
4520
4521 addr = force_reg (DImode, XEXP (mem, 0));
4522 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4523 NULL_RTX, 1, OPTAB_DIRECT);
4524
4525 oldval = convert_modes (DImode, mode, oldval, 1);
4526 newval = emit_insxl (mode, newval, addr);
4527
4528 wdst = gen_reg_rtx (DImode);
4529 if (mode == QImode)
4530 fn5 = gen_sync_compare_and_swapqi_1;
4531 else
4532 fn5 = gen_sync_compare_and_swaphi_1;
4533 emit_insn (fn5 (wdst, addr, oldval, newval, align));
4534
4535 emit_move_insn (dst, gen_lowpart (mode, wdst));
4536 }
4537
4538 void
4539 alpha_split_compare_and_swap_12 (enum machine_mode mode, rtx dest, rtx addr,
4540 rtx oldval, rtx newval, rtx align,
4541 rtx scratch, rtx cond)
4542 {
4543 rtx label1, label2, mem, width, mask, x;
4544
4545 mem = gen_rtx_MEM (DImode, align);
4546 MEM_VOLATILE_P (mem) = 1;
4547
4548 emit_insn (gen_memory_barrier ());
4549 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4550 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4551 emit_label (XEXP (label1, 0));
4552
4553 emit_load_locked (DImode, scratch, mem);
4554
4555 width = GEN_INT (GET_MODE_BITSIZE (mode));
4556 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4557 if (WORDS_BIG_ENDIAN)
4558 emit_insn (gen_extxl_be (dest, scratch, width, addr));
4559 else
4560 emit_insn (gen_extxl_le (dest, scratch, width, addr));
4561
4562 if (oldval == const0_rtx)
4563 x = gen_rtx_NE (DImode, dest, const0_rtx);
4564 else
4565 {
4566 x = gen_rtx_EQ (DImode, dest, oldval);
4567 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4568 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4569 }
4570 emit_unlikely_jump (x, label2);
4571
4572 if (WORDS_BIG_ENDIAN)
4573 emit_insn (gen_mskxl_be (scratch, scratch, mask, addr));
4574 else
4575 emit_insn (gen_mskxl_le (scratch, scratch, mask, addr));
4576 emit_insn (gen_iordi3 (scratch, scratch, newval));
4577
4578 emit_store_conditional (DImode, scratch, mem, scratch);
4579
4580 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4581 emit_unlikely_jump (x, label1);
4582
4583 emit_insn (gen_memory_barrier ());
4584 emit_label (XEXP (label2, 0));
4585 }
4586
4587 /* Expand an atomic exchange operation. */
4588
4589 void
4590 alpha_split_lock_test_and_set (rtx retval, rtx mem, rtx val, rtx scratch)
4591 {
4592 enum machine_mode mode = GET_MODE (mem);
4593 rtx label, x, cond = gen_lowpart (DImode, scratch);
4594
4595 emit_insn (gen_memory_barrier ());
4596
4597 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4598 emit_label (XEXP (label, 0));
4599
4600 emit_load_locked (mode, retval, mem);
4601 emit_move_insn (scratch, val);
4602 emit_store_conditional (mode, cond, mem, scratch);
4603
4604 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4605 emit_unlikely_jump (x, label);
4606 }
4607
4608 void
4609 alpha_expand_lock_test_and_set_12 (rtx dst, rtx mem, rtx val)
4610 {
4611 enum machine_mode mode = GET_MODE (mem);
4612 rtx addr, align, wdst;
4613 rtx (*fn4) (rtx, rtx, rtx, rtx);
4614
4615 /* Force the address into a register. */
4616 addr = force_reg (DImode, XEXP (mem, 0));
4617
4618 /* Align it to a multiple of 8. */
4619 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4620 NULL_RTX, 1, OPTAB_DIRECT);
4621
4622 /* Insert val into the correct byte location within the word. */
4623 val = emit_insxl (mode, val, addr);
4624
4625 wdst = gen_reg_rtx (DImode);
4626 if (mode == QImode)
4627 fn4 = gen_sync_lock_test_and_setqi_1;
4628 else
4629 fn4 = gen_sync_lock_test_and_sethi_1;
4630 emit_insn (fn4 (wdst, addr, val, align));
4631
4632 emit_move_insn (dst, gen_lowpart (mode, wdst));
4633 }
4634
4635 void
4636 alpha_split_lock_test_and_set_12 (enum machine_mode mode, rtx dest, rtx addr,
4637 rtx val, rtx align, rtx scratch)
4638 {
4639 rtx label, mem, width, mask, x;
4640
4641 mem = gen_rtx_MEM (DImode, align);
4642 MEM_VOLATILE_P (mem) = 1;
4643
4644 emit_insn (gen_memory_barrier ());
4645 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4646 emit_label (XEXP (label, 0));
4647
4648 emit_load_locked (DImode, scratch, mem);
4649
4650 width = GEN_INT (GET_MODE_BITSIZE (mode));
4651 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4652 if (WORDS_BIG_ENDIAN)
4653 {
4654 emit_insn (gen_extxl_be (dest, scratch, width, addr));
4655 emit_insn (gen_mskxl_be (scratch, scratch, mask, addr));
4656 }
4657 else
4658 {
4659 emit_insn (gen_extxl_le (dest, scratch, width, addr));
4660 emit_insn (gen_mskxl_le (scratch, scratch, mask, addr));
4661 }
4662 emit_insn (gen_iordi3 (scratch, scratch, val));
4663
4664 emit_store_conditional (DImode, scratch, mem, scratch);
4665
4666 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4667 emit_unlikely_jump (x, label);
4668 }
4669 \f
4670 /* Adjust the cost of a scheduling dependency. Return the new cost of
4671 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4672
4673 static int
4674 alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4675 {
4676 enum attr_type insn_type, dep_insn_type;
4677
4678 /* If the dependence is an anti-dependence, there is no cost. For an
4679 output dependence, there is sometimes a cost, but it doesn't seem
4680 worth handling those few cases. */
4681 if (REG_NOTE_KIND (link) != 0)
4682 return cost;
4683
4684 /* If we can't recognize the insns, we can't really do anything. */
4685 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4686 return cost;
4687
4688 insn_type = get_attr_type (insn);
4689 dep_insn_type = get_attr_type (dep_insn);
4690
4691 /* Bring in the user-defined memory latency. */
4692 if (dep_insn_type == TYPE_ILD
4693 || dep_insn_type == TYPE_FLD
4694 || dep_insn_type == TYPE_LDSYM)
4695 cost += alpha_memory_latency-1;
4696
4697 /* Everything else handled in DFA bypasses now. */
4698
4699 return cost;
4700 }
4701
4702 /* The number of instructions that can be issued per cycle. */
4703
4704 static int
4705 alpha_issue_rate (void)
4706 {
4707 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4708 }
4709
4710 /* How many alternative schedules to try. This should be as wide as the
4711 scheduling freedom in the DFA, but no wider. Making this value too
4712 large results extra work for the scheduler.
4713
4714 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4715 alternative schedules. For EV5, we can choose between E0/E1 and
4716 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4717
4718 static int
4719 alpha_multipass_dfa_lookahead (void)
4720 {
4721 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4722 }
4723 \f
4724 /* Machine-specific function data. */
4725
4726 struct machine_function GTY(())
4727 {
4728 /* For unicosmk. */
4729 /* List of call information words for calls from this function. */
4730 struct rtx_def *first_ciw;
4731 struct rtx_def *last_ciw;
4732 int ciw_count;
4733
4734 /* List of deferred case vectors. */
4735 struct rtx_def *addr_list;
4736
4737 /* For OSF. */
4738 const char *some_ld_name;
4739
4740 /* For TARGET_LD_BUGGY_LDGP. */
4741 struct rtx_def *gp_save_rtx;
4742 };
4743
4744 /* How to allocate a 'struct machine_function'. */
4745
4746 static struct machine_function *
4747 alpha_init_machine_status (void)
4748 {
4749 return ((struct machine_function *)
4750 ggc_alloc_cleared (sizeof (struct machine_function)));
4751 }
4752
4753 /* Functions to save and restore alpha_return_addr_rtx. */
4754
4755 /* Start the ball rolling with RETURN_ADDR_RTX. */
4756
4757 rtx
4758 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4759 {
4760 if (count != 0)
4761 return const0_rtx;
4762
4763 return get_hard_reg_initial_val (Pmode, REG_RA);
4764 }
4765
4766 /* Return or create a memory slot containing the gp value for the current
4767 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4768
4769 rtx
4770 alpha_gp_save_rtx (void)
4771 {
4772 rtx seq, m = cfun->machine->gp_save_rtx;
4773
4774 if (m == NULL)
4775 {
4776 start_sequence ();
4777
4778 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4779 m = validize_mem (m);
4780 emit_move_insn (m, pic_offset_table_rtx);
4781
4782 seq = get_insns ();
4783 end_sequence ();
4784 emit_insn_at_entry (seq);
4785
4786 cfun->machine->gp_save_rtx = m;
4787 }
4788
4789 return m;
4790 }
4791
4792 static int
4793 alpha_ra_ever_killed (void)
4794 {
4795 rtx top;
4796
4797 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4798 return regs_ever_live[REG_RA];
4799
4800 push_topmost_sequence ();
4801 top = get_insns ();
4802 pop_topmost_sequence ();
4803
4804 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
4805 }
4806
4807 \f
4808 /* Return the trap mode suffix applicable to the current
4809 instruction, or NULL. */
4810
4811 static const char *
4812 get_trap_mode_suffix (void)
4813 {
4814 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4815
4816 switch (s)
4817 {
4818 case TRAP_SUFFIX_NONE:
4819 return NULL;
4820
4821 case TRAP_SUFFIX_SU:
4822 if (alpha_fptm >= ALPHA_FPTM_SU)
4823 return "su";
4824 return NULL;
4825
4826 case TRAP_SUFFIX_SUI:
4827 if (alpha_fptm >= ALPHA_FPTM_SUI)
4828 return "sui";
4829 return NULL;
4830
4831 case TRAP_SUFFIX_V_SV:
4832 switch (alpha_fptm)
4833 {
4834 case ALPHA_FPTM_N:
4835 return NULL;
4836 case ALPHA_FPTM_U:
4837 return "v";
4838 case ALPHA_FPTM_SU:
4839 case ALPHA_FPTM_SUI:
4840 return "sv";
4841 default:
4842 gcc_unreachable ();
4843 }
4844
4845 case TRAP_SUFFIX_V_SV_SVI:
4846 switch (alpha_fptm)
4847 {
4848 case ALPHA_FPTM_N:
4849 return NULL;
4850 case ALPHA_FPTM_U:
4851 return "v";
4852 case ALPHA_FPTM_SU:
4853 return "sv";
4854 case ALPHA_FPTM_SUI:
4855 return "svi";
4856 default:
4857 gcc_unreachable ();
4858 }
4859 break;
4860
4861 case TRAP_SUFFIX_U_SU_SUI:
4862 switch (alpha_fptm)
4863 {
4864 case ALPHA_FPTM_N:
4865 return NULL;
4866 case ALPHA_FPTM_U:
4867 return "u";
4868 case ALPHA_FPTM_SU:
4869 return "su";
4870 case ALPHA_FPTM_SUI:
4871 return "sui";
4872 default:
4873 gcc_unreachable ();
4874 }
4875 break;
4876
4877 default:
4878 gcc_unreachable ();
4879 }
4880 gcc_unreachable ();
4881 }
4882
4883 /* Return the rounding mode suffix applicable to the current
4884 instruction, or NULL. */
4885
4886 static const char *
4887 get_round_mode_suffix (void)
4888 {
4889 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
4890
4891 switch (s)
4892 {
4893 case ROUND_SUFFIX_NONE:
4894 return NULL;
4895 case ROUND_SUFFIX_NORMAL:
4896 switch (alpha_fprm)
4897 {
4898 case ALPHA_FPRM_NORM:
4899 return NULL;
4900 case ALPHA_FPRM_MINF:
4901 return "m";
4902 case ALPHA_FPRM_CHOP:
4903 return "c";
4904 case ALPHA_FPRM_DYN:
4905 return "d";
4906 default:
4907 gcc_unreachable ();
4908 }
4909 break;
4910
4911 case ROUND_SUFFIX_C:
4912 return "c";
4913
4914 default:
4915 gcc_unreachable ();
4916 }
4917 gcc_unreachable ();
4918 }
4919
4920 /* Locate some local-dynamic symbol still in use by this function
4921 so that we can print its name in some movdi_er_tlsldm pattern. */
4922
4923 static int
4924 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
4925 {
4926 rtx x = *px;
4927
4928 if (GET_CODE (x) == SYMBOL_REF
4929 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
4930 {
4931 cfun->machine->some_ld_name = XSTR (x, 0);
4932 return 1;
4933 }
4934
4935 return 0;
4936 }
4937
4938 static const char *
4939 get_some_local_dynamic_name (void)
4940 {
4941 rtx insn;
4942
4943 if (cfun->machine->some_ld_name)
4944 return cfun->machine->some_ld_name;
4945
4946 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
4947 if (INSN_P (insn)
4948 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
4949 return cfun->machine->some_ld_name;
4950
4951 gcc_unreachable ();
4952 }
4953
4954 /* Print an operand. Recognize special options, documented below. */
4955
4956 void
4957 print_operand (FILE *file, rtx x, int code)
4958 {
4959 int i;
4960
4961 switch (code)
4962 {
4963 case '~':
4964 /* Print the assembler name of the current function. */
4965 assemble_name (file, alpha_fnname);
4966 break;
4967
4968 case '&':
4969 assemble_name (file, get_some_local_dynamic_name ());
4970 break;
4971
4972 case '/':
4973 {
4974 const char *trap = get_trap_mode_suffix ();
4975 const char *round = get_round_mode_suffix ();
4976
4977 if (trap || round)
4978 fprintf (file, (TARGET_AS_SLASH_BEFORE_SUFFIX ? "/%s%s" : "%s%s"),
4979 (trap ? trap : ""), (round ? round : ""));
4980 break;
4981 }
4982
4983 case ',':
4984 /* Generates single precision instruction suffix. */
4985 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
4986 break;
4987
4988 case '-':
4989 /* Generates double precision instruction suffix. */
4990 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
4991 break;
4992
4993 case '+':
4994 /* Generates a nop after a noreturn call at the very end of the
4995 function. */
4996 if (next_real_insn (current_output_insn) == 0)
4997 fprintf (file, "\n\tnop");
4998 break;
4999
5000 case '#':
5001 if (alpha_this_literal_sequence_number == 0)
5002 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5003 fprintf (file, "%d", alpha_this_literal_sequence_number);
5004 break;
5005
5006 case '*':
5007 if (alpha_this_gpdisp_sequence_number == 0)
5008 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5009 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5010 break;
5011
5012 case 'H':
5013 if (GET_CODE (x) == HIGH)
5014 output_addr_const (file, XEXP (x, 0));
5015 else
5016 output_operand_lossage ("invalid %%H value");
5017 break;
5018
5019 case 'J':
5020 {
5021 const char *lituse;
5022
5023 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5024 {
5025 x = XVECEXP (x, 0, 0);
5026 lituse = "lituse_tlsgd";
5027 }
5028 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5029 {
5030 x = XVECEXP (x, 0, 0);
5031 lituse = "lituse_tlsldm";
5032 }
5033 else if (GET_CODE (x) == CONST_INT)
5034 lituse = "lituse_jsr";
5035 else
5036 {
5037 output_operand_lossage ("invalid %%J value");
5038 break;
5039 }
5040
5041 if (x != const0_rtx)
5042 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5043 }
5044 break;
5045
5046 case 'j':
5047 {
5048 const char *lituse;
5049
5050 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5051 lituse = "lituse_jsrdirect";
5052 #else
5053 lituse = "lituse_jsr";
5054 #endif
5055
5056 gcc_assert (INTVAL (x) != 0);
5057 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5058 }
5059 break;
5060 case 'r':
5061 /* If this operand is the constant zero, write it as "$31". */
5062 if (GET_CODE (x) == REG)
5063 fprintf (file, "%s", reg_names[REGNO (x)]);
5064 else if (x == CONST0_RTX (GET_MODE (x)))
5065 fprintf (file, "$31");
5066 else
5067 output_operand_lossage ("invalid %%r value");
5068 break;
5069
5070 case 'R':
5071 /* Similar, but for floating-point. */
5072 if (GET_CODE (x) == REG)
5073 fprintf (file, "%s", reg_names[REGNO (x)]);
5074 else if (x == CONST0_RTX (GET_MODE (x)))
5075 fprintf (file, "$f31");
5076 else
5077 output_operand_lossage ("invalid %%R value");
5078 break;
5079
5080 case 'N':
5081 /* Write the 1's complement of a constant. */
5082 if (GET_CODE (x) != CONST_INT)
5083 output_operand_lossage ("invalid %%N value");
5084
5085 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5086 break;
5087
5088 case 'P':
5089 /* Write 1 << C, for a constant C. */
5090 if (GET_CODE (x) != CONST_INT)
5091 output_operand_lossage ("invalid %%P value");
5092
5093 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5094 break;
5095
5096 case 'h':
5097 /* Write the high-order 16 bits of a constant, sign-extended. */
5098 if (GET_CODE (x) != CONST_INT)
5099 output_operand_lossage ("invalid %%h value");
5100
5101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5102 break;
5103
5104 case 'L':
5105 /* Write the low-order 16 bits of a constant, sign-extended. */
5106 if (GET_CODE (x) != CONST_INT)
5107 output_operand_lossage ("invalid %%L value");
5108
5109 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5110 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5111 break;
5112
5113 case 'm':
5114 /* Write mask for ZAP insn. */
5115 if (GET_CODE (x) == CONST_DOUBLE)
5116 {
5117 HOST_WIDE_INT mask = 0;
5118 HOST_WIDE_INT value;
5119
5120 value = CONST_DOUBLE_LOW (x);
5121 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5122 i++, value >>= 8)
5123 if (value & 0xff)
5124 mask |= (1 << i);
5125
5126 value = CONST_DOUBLE_HIGH (x);
5127 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5128 i++, value >>= 8)
5129 if (value & 0xff)
5130 mask |= (1 << (i + sizeof (int)));
5131
5132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5133 }
5134
5135 else if (GET_CODE (x) == CONST_INT)
5136 {
5137 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5138
5139 for (i = 0; i < 8; i++, value >>= 8)
5140 if (value & 0xff)
5141 mask |= (1 << i);
5142
5143 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5144 }
5145 else
5146 output_operand_lossage ("invalid %%m value");
5147 break;
5148
5149 case 'M':
5150 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5151 if (GET_CODE (x) != CONST_INT
5152 || (INTVAL (x) != 8 && INTVAL (x) != 16
5153 && INTVAL (x) != 32 && INTVAL (x) != 64))
5154 output_operand_lossage ("invalid %%M value");
5155
5156 fprintf (file, "%s",
5157 (INTVAL (x) == 8 ? "b"
5158 : INTVAL (x) == 16 ? "w"
5159 : INTVAL (x) == 32 ? "l"
5160 : "q"));
5161 break;
5162
5163 case 'U':
5164 /* Similar, except do it from the mask. */
5165 if (GET_CODE (x) == CONST_INT)
5166 {
5167 HOST_WIDE_INT value = INTVAL (x);
5168
5169 if (value == 0xff)
5170 {
5171 fputc ('b', file);
5172 break;
5173 }
5174 if (value == 0xffff)
5175 {
5176 fputc ('w', file);
5177 break;
5178 }
5179 if (value == 0xffffffff)
5180 {
5181 fputc ('l', file);
5182 break;
5183 }
5184 if (value == -1)
5185 {
5186 fputc ('q', file);
5187 break;
5188 }
5189 }
5190 else if (HOST_BITS_PER_WIDE_INT == 32
5191 && GET_CODE (x) == CONST_DOUBLE
5192 && CONST_DOUBLE_LOW (x) == 0xffffffff
5193 && CONST_DOUBLE_HIGH (x) == 0)
5194 {
5195 fputc ('l', file);
5196 break;
5197 }
5198 output_operand_lossage ("invalid %%U value");
5199 break;
5200
5201 case 's':
5202 /* Write the constant value divided by 8 for little-endian mode or
5203 (56 - value) / 8 for big-endian mode. */
5204
5205 if (GET_CODE (x) != CONST_INT
5206 || (unsigned HOST_WIDE_INT) INTVAL (x) >= (WORDS_BIG_ENDIAN
5207 ? 56
5208 : 64)
5209 || (INTVAL (x) & 7) != 0)
5210 output_operand_lossage ("invalid %%s value");
5211
5212 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5213 WORDS_BIG_ENDIAN
5214 ? (56 - INTVAL (x)) / 8
5215 : INTVAL (x) / 8);
5216 break;
5217
5218 case 'S':
5219 /* Same, except compute (64 - c) / 8 */
5220
5221 if (GET_CODE (x) != CONST_INT
5222 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5223 && (INTVAL (x) & 7) != 8)
5224 output_operand_lossage ("invalid %%s value");
5225
5226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5227 break;
5228
5229 case 't':
5230 {
5231 /* On Unicos/Mk systems: use a DEX expression if the symbol
5232 clashes with a register name. */
5233 int dex = unicosmk_need_dex (x);
5234 if (dex)
5235 fprintf (file, "DEX(%d)", dex);
5236 else
5237 output_addr_const (file, x);
5238 }
5239 break;
5240
5241 case 'C': case 'D': case 'c': case 'd':
5242 /* Write out comparison name. */
5243 {
5244 enum rtx_code c = GET_CODE (x);
5245
5246 if (!COMPARISON_P (x))
5247 output_operand_lossage ("invalid %%C value");
5248
5249 else if (code == 'D')
5250 c = reverse_condition (c);
5251 else if (code == 'c')
5252 c = swap_condition (c);
5253 else if (code == 'd')
5254 c = swap_condition (reverse_condition (c));
5255
5256 if (c == LEU)
5257 fprintf (file, "ule");
5258 else if (c == LTU)
5259 fprintf (file, "ult");
5260 else if (c == UNORDERED)
5261 fprintf (file, "un");
5262 else
5263 fprintf (file, "%s", GET_RTX_NAME (c));
5264 }
5265 break;
5266
5267 case 'E':
5268 /* Write the divide or modulus operator. */
5269 switch (GET_CODE (x))
5270 {
5271 case DIV:
5272 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5273 break;
5274 case UDIV:
5275 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5276 break;
5277 case MOD:
5278 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5279 break;
5280 case UMOD:
5281 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5282 break;
5283 default:
5284 output_operand_lossage ("invalid %%E value");
5285 break;
5286 }
5287 break;
5288
5289 case 'A':
5290 /* Write "_u" for unaligned access. */
5291 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
5292 fprintf (file, "_u");
5293 break;
5294
5295 case 0:
5296 if (GET_CODE (x) == REG)
5297 fprintf (file, "%s", reg_names[REGNO (x)]);
5298 else if (GET_CODE (x) == MEM)
5299 output_address (XEXP (x, 0));
5300 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5301 {
5302 switch (XINT (XEXP (x, 0), 1))
5303 {
5304 case UNSPEC_DTPREL:
5305 case UNSPEC_TPREL:
5306 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5307 break;
5308 default:
5309 output_operand_lossage ("unknown relocation unspec");
5310 break;
5311 }
5312 }
5313 else
5314 output_addr_const (file, x);
5315 break;
5316
5317 default:
5318 output_operand_lossage ("invalid %%xn code");
5319 }
5320 }
5321
5322 void
5323 print_operand_address (FILE *file, rtx addr)
5324 {
5325 int basereg = 31;
5326 HOST_WIDE_INT offset = 0;
5327
5328 if (GET_CODE (addr) == AND)
5329 addr = XEXP (addr, 0);
5330
5331 if (GET_CODE (addr) == PLUS
5332 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
5333 {
5334 offset = INTVAL (XEXP (addr, 1));
5335 addr = XEXP (addr, 0);
5336 }
5337
5338 if (GET_CODE (addr) == LO_SUM)
5339 {
5340 const char *reloc16, *reloclo;
5341 rtx op1 = XEXP (addr, 1);
5342
5343 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5344 {
5345 op1 = XEXP (op1, 0);
5346 switch (XINT (op1, 1))
5347 {
5348 case UNSPEC_DTPREL:
5349 reloc16 = NULL;
5350 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5351 break;
5352 case UNSPEC_TPREL:
5353 reloc16 = NULL;
5354 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5355 break;
5356 default:
5357 output_operand_lossage ("unknown relocation unspec");
5358 return;
5359 }
5360
5361 output_addr_const (file, XVECEXP (op1, 0, 0));
5362 }
5363 else
5364 {
5365 reloc16 = "gprel";
5366 reloclo = "gprellow";
5367 output_addr_const (file, op1);
5368 }
5369
5370 if (offset)
5371 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5372
5373 addr = XEXP (addr, 0);
5374 switch (GET_CODE (addr))
5375 {
5376 case REG:
5377 basereg = REGNO (addr);
5378 break;
5379
5380 case SUBREG:
5381 basereg = subreg_regno (addr);
5382 break;
5383
5384 default:
5385 gcc_unreachable ();
5386 }
5387
5388 fprintf (file, "($%d)\t\t!%s", basereg,
5389 (basereg == 29 ? reloc16 : reloclo));
5390 return;
5391 }
5392
5393 switch (GET_CODE (addr))
5394 {
5395 case REG:
5396 basereg = REGNO (addr);
5397 break;
5398
5399 case SUBREG:
5400 basereg = subreg_regno (addr);
5401 break;
5402
5403 case CONST_INT:
5404 offset = INTVAL (addr);
5405 break;
5406
5407 #if TARGET_ABI_OPEN_VMS
5408 case SYMBOL_REF:
5409 fprintf (file, "%s", XSTR (addr, 0));
5410 return;
5411
5412 case CONST:
5413 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5414 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5415 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5416 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5417 INTVAL (XEXP (XEXP (addr, 0), 1)));
5418 return;
5419
5420 #endif
5421 default:
5422 gcc_unreachable ();
5423 }
5424
5425 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5426 }
5427 \f
5428 /* Emit RTL insns to initialize the variable parts of a trampoline at
5429 TRAMP. FNADDR is an RTX for the address of the function's pure
5430 code. CXT is an RTX for the static chain value for the function.
5431
5432 The three offset parameters are for the individual template's
5433 layout. A JMPOFS < 0 indicates that the trampoline does not
5434 contain instructions at all.
5435
5436 We assume here that a function will be called many more times than
5437 its address is taken (e.g., it might be passed to qsort), so we
5438 take the trouble to initialize the "hint" field in the JMP insn.
5439 Note that the hint field is PC (new) + 4 * bits 13:0. */
5440
5441 void
5442 alpha_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt,
5443 int fnofs, int cxtofs, int jmpofs)
5444 {
5445 rtx temp, temp1, addr;
5446 /* VMS really uses DImode pointers in memory at this point. */
5447 enum machine_mode mode = TARGET_ABI_OPEN_VMS ? Pmode : ptr_mode;
5448
5449 #ifdef POINTERS_EXTEND_UNSIGNED
5450 fnaddr = convert_memory_address (mode, fnaddr);
5451 cxt = convert_memory_address (mode, cxt);
5452 #endif
5453
5454 /* Store function address and CXT. */
5455 addr = memory_address (mode, plus_constant (tramp, fnofs));
5456 emit_move_insn (gen_rtx_MEM (mode, addr), fnaddr);
5457 addr = memory_address (mode, plus_constant (tramp, cxtofs));
5458 emit_move_insn (gen_rtx_MEM (mode, addr), cxt);
5459
5460 /* This has been disabled since the hint only has a 32k range, and in
5461 no existing OS is the stack within 32k of the text segment. */
5462 if (0 && jmpofs >= 0)
5463 {
5464 /* Compute hint value. */
5465 temp = force_operand (plus_constant (tramp, jmpofs+4), NULL_RTX);
5466 temp = expand_binop (DImode, sub_optab, fnaddr, temp, temp, 1,
5467 OPTAB_WIDEN);
5468 temp = expand_shift (RSHIFT_EXPR, Pmode, temp,
5469 build_int_cst (NULL_TREE, 2), NULL_RTX, 1);
5470 temp = expand_and (SImode, gen_lowpart (SImode, temp),
5471 GEN_INT (0x3fff), 0);
5472
5473 /* Merge in the hint. */
5474 addr = memory_address (SImode, plus_constant (tramp, jmpofs));
5475 temp1 = force_reg (SImode, gen_rtx_MEM (SImode, addr));
5476 temp1 = expand_and (SImode, temp1, GEN_INT (0xffffc000), NULL_RTX);
5477 temp1 = expand_binop (SImode, ior_optab, temp1, temp, temp1, 1,
5478 OPTAB_WIDEN);
5479 emit_move_insn (gen_rtx_MEM (SImode, addr), temp1);
5480 }
5481
5482 #ifdef ENABLE_EXECUTE_STACK
5483 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5484 0, VOIDmode, 1, tramp, Pmode);
5485 #endif
5486
5487 if (jmpofs >= 0)
5488 emit_insn (gen_imb ());
5489 }
5490 \f
5491 /* Determine where to put an argument to a function.
5492 Value is zero to push the argument on the stack,
5493 or a hard register in which to store the argument.
5494
5495 MODE is the argument's machine mode.
5496 TYPE is the data type of the argument (as a tree).
5497 This is null for libcalls where that information may
5498 not be available.
5499 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5500 the preceding args and about the function being called.
5501 NAMED is nonzero if this argument is a named parameter
5502 (otherwise it is an extra parameter matching an ellipsis).
5503
5504 On Alpha the first 6 words of args are normally in registers
5505 and the rest are pushed. */
5506
5507 rtx
5508 function_arg (CUMULATIVE_ARGS cum, enum machine_mode mode, tree type,
5509 int named ATTRIBUTE_UNUSED)
5510 {
5511 int basereg;
5512 int num_args;
5513
5514 /* Don't get confused and pass small structures in FP registers. */
5515 if (type && AGGREGATE_TYPE_P (type))
5516 basereg = 16;
5517 else
5518 {
5519 #ifdef ENABLE_CHECKING
5520 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5521 values here. */
5522 gcc_assert (!COMPLEX_MODE_P (mode));
5523 #endif
5524
5525 /* Set up defaults for FP operands passed in FP registers, and
5526 integral operands passed in integer registers. */
5527 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5528 basereg = 32 + 16;
5529 else
5530 basereg = 16;
5531 }
5532
5533 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5534 the three platforms, so we can't avoid conditional compilation. */
5535 #if TARGET_ABI_OPEN_VMS
5536 {
5537 if (mode == VOIDmode)
5538 return alpha_arg_info_reg_val (cum);
5539
5540 num_args = cum.num_args;
5541 if (num_args >= 6
5542 || targetm.calls.must_pass_in_stack (mode, type))
5543 return NULL_RTX;
5544 }
5545 #elif TARGET_ABI_UNICOSMK
5546 {
5547 int size;
5548
5549 /* If this is the last argument, generate the call info word (CIW). */
5550 /* ??? We don't include the caller's line number in the CIW because
5551 I don't know how to determine it if debug infos are turned off. */
5552 if (mode == VOIDmode)
5553 {
5554 int i;
5555 HOST_WIDE_INT lo;
5556 HOST_WIDE_INT hi;
5557 rtx ciw;
5558
5559 lo = 0;
5560
5561 for (i = 0; i < cum.num_reg_words && i < 5; i++)
5562 if (cum.reg_args_type[i])
5563 lo |= (1 << (7 - i));
5564
5565 if (cum.num_reg_words == 6 && cum.reg_args_type[5])
5566 lo |= 7;
5567 else
5568 lo |= cum.num_reg_words;
5569
5570 #if HOST_BITS_PER_WIDE_INT == 32
5571 hi = (cum.num_args << 20) | cum.num_arg_words;
5572 #else
5573 lo = lo | ((HOST_WIDE_INT) cum.num_args << 52)
5574 | ((HOST_WIDE_INT) cum.num_arg_words << 32);
5575 hi = 0;
5576 #endif
5577 ciw = immed_double_const (lo, hi, DImode);
5578
5579 return gen_rtx_UNSPEC (DImode, gen_rtvec (1, ciw),
5580 UNSPEC_UMK_LOAD_CIW);
5581 }
5582
5583 size = ALPHA_ARG_SIZE (mode, type, named);
5584 num_args = cum.num_reg_words;
5585 if (cum.force_stack
5586 || cum.num_reg_words + size > 6
5587 || targetm.calls.must_pass_in_stack (mode, type))
5588 return NULL_RTX;
5589 else if (type && TYPE_MODE (type) == BLKmode)
5590 {
5591 rtx reg1, reg2;
5592
5593 reg1 = gen_rtx_REG (DImode, num_args + 16);
5594 reg1 = gen_rtx_EXPR_LIST (DImode, reg1, const0_rtx);
5595
5596 /* The argument fits in two registers. Note that we still need to
5597 reserve a register for empty structures. */
5598 if (size == 0)
5599 return NULL_RTX;
5600 else if (size == 1)
5601 return gen_rtx_PARALLEL (mode, gen_rtvec (1, reg1));
5602 else
5603 {
5604 reg2 = gen_rtx_REG (DImode, num_args + 17);
5605 reg2 = gen_rtx_EXPR_LIST (DImode, reg2, GEN_INT (8));
5606 return gen_rtx_PARALLEL (mode, gen_rtvec (2, reg1, reg2));
5607 }
5608 }
5609 }
5610 #elif TARGET_ABI_OSF
5611 {
5612 if (cum >= 6)
5613 return NULL_RTX;
5614 num_args = cum;
5615
5616 /* VOID is passed as a special flag for "last argument". */
5617 if (type == void_type_node)
5618 basereg = 16;
5619 else if (targetm.calls.must_pass_in_stack (mode, type))
5620 return NULL_RTX;
5621 }
5622 #else
5623 #error Unhandled ABI
5624 #endif
5625
5626 return gen_rtx_REG (mode, num_args + basereg);
5627 }
5628
5629 static int
5630 alpha_arg_partial_bytes (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5631 enum machine_mode mode ATTRIBUTE_UNUSED,
5632 tree type ATTRIBUTE_UNUSED,
5633 bool named ATTRIBUTE_UNUSED)
5634 {
5635 int words = 0;
5636
5637 #if TARGET_ABI_OPEN_VMS
5638 if (cum->num_args < 6
5639 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5640 words = 6 - cum->num_args;
5641 #elif TARGET_ABI_UNICOSMK
5642 /* Never any split arguments. */
5643 #elif TARGET_ABI_OSF
5644 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5645 words = 6 - *cum;
5646 #else
5647 #error Unhandled ABI
5648 #endif
5649
5650 return words * UNITS_PER_WORD;
5651 }
5652
5653
5654 /* Return true if TYPE must be returned in memory, instead of in registers. */
5655
5656 static bool
5657 alpha_return_in_memory (tree type, tree fndecl ATTRIBUTE_UNUSED)
5658 {
5659 enum machine_mode mode = VOIDmode;
5660 int size;
5661
5662 if (type)
5663 {
5664 mode = TYPE_MODE (type);
5665
5666 /* All aggregates are returned in memory. */
5667 if (AGGREGATE_TYPE_P (type))
5668 return true;
5669 }
5670
5671 size = GET_MODE_SIZE (mode);
5672 switch (GET_MODE_CLASS (mode))
5673 {
5674 case MODE_VECTOR_FLOAT:
5675 /* Pass all float vectors in memory, like an aggregate. */
5676 return true;
5677
5678 case MODE_COMPLEX_FLOAT:
5679 /* We judge complex floats on the size of their element,
5680 not the size of the whole type. */
5681 size = GET_MODE_UNIT_SIZE (mode);
5682 break;
5683
5684 case MODE_INT:
5685 case MODE_FLOAT:
5686 case MODE_COMPLEX_INT:
5687 case MODE_VECTOR_INT:
5688 break;
5689
5690 default:
5691 /* ??? We get called on all sorts of random stuff from
5692 aggregate_value_p. We must return something, but it's not
5693 clear what's safe to return. Pretend it's a struct I
5694 guess. */
5695 return true;
5696 }
5697
5698 /* Otherwise types must fit in one register. */
5699 return size > UNITS_PER_WORD;
5700 }
5701
5702 /* Return true if TYPE should be passed by invisible reference. */
5703
5704 static bool
5705 alpha_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5706 enum machine_mode mode,
5707 tree type ATTRIBUTE_UNUSED,
5708 bool named ATTRIBUTE_UNUSED)
5709 {
5710 return mode == TFmode || mode == TCmode;
5711 }
5712
5713 /* Define how to find the value returned by a function. VALTYPE is the
5714 data type of the value (as a tree). If the precise function being
5715 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5716 MODE is set instead of VALTYPE for libcalls.
5717
5718 On Alpha the value is found in $0 for integer functions and
5719 $f0 for floating-point functions. */
5720
5721 rtx
5722 function_value (tree valtype, tree func ATTRIBUTE_UNUSED,
5723 enum machine_mode mode)
5724 {
5725 unsigned int regnum, dummy;
5726 enum mode_class class;
5727
5728 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5729
5730 if (valtype)
5731 mode = TYPE_MODE (valtype);
5732
5733 class = GET_MODE_CLASS (mode);
5734 switch (class)
5735 {
5736 case MODE_INT:
5737 PROMOTE_MODE (mode, dummy, valtype);
5738 /* FALLTHRU */
5739
5740 case MODE_COMPLEX_INT:
5741 case MODE_VECTOR_INT:
5742 regnum = 0;
5743 break;
5744
5745 case MODE_FLOAT:
5746 regnum = 32;
5747 break;
5748
5749 case MODE_COMPLEX_FLOAT:
5750 {
5751 enum machine_mode cmode = GET_MODE_INNER (mode);
5752
5753 return gen_rtx_PARALLEL
5754 (VOIDmode,
5755 gen_rtvec (2,
5756 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5757 const0_rtx),
5758 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5759 GEN_INT (GET_MODE_SIZE (cmode)))));
5760 }
5761
5762 default:
5763 gcc_unreachable ();
5764 }
5765
5766 return gen_rtx_REG (mode, regnum);
5767 }
5768
5769 /* TCmode complex values are passed by invisible reference. We
5770 should not split these values. */
5771
5772 static bool
5773 alpha_split_complex_arg (tree type)
5774 {
5775 return TYPE_MODE (type) != TCmode;
5776 }
5777
5778 static tree
5779 alpha_build_builtin_va_list (void)
5780 {
5781 tree base, ofs, space, record, type_decl;
5782
5783 if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK)
5784 return ptr_type_node;
5785
5786 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5787 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5788 TREE_CHAIN (record) = type_decl;
5789 TYPE_NAME (record) = type_decl;
5790
5791 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5792
5793 /* Dummy field to prevent alignment warnings. */
5794 space = build_decl (FIELD_DECL, NULL_TREE, integer_type_node);
5795 DECL_FIELD_CONTEXT (space) = record;
5796 DECL_ARTIFICIAL (space) = 1;
5797 DECL_IGNORED_P (space) = 1;
5798
5799 ofs = build_decl (FIELD_DECL, get_identifier ("__offset"),
5800 integer_type_node);
5801 DECL_FIELD_CONTEXT (ofs) = record;
5802 TREE_CHAIN (ofs) = space;
5803
5804 base = build_decl (FIELD_DECL, get_identifier ("__base"),
5805 ptr_type_node);
5806 DECL_FIELD_CONTEXT (base) = record;
5807 TREE_CHAIN (base) = ofs;
5808
5809 TYPE_FIELDS (record) = base;
5810 layout_type (record);
5811
5812 va_list_gpr_counter_field = ofs;
5813 return record;
5814 }
5815
5816 #if TARGET_ABI_OSF
5817 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5818 and constant additions. */
5819
5820 static tree
5821 va_list_skip_additions (tree lhs)
5822 {
5823 tree rhs, stmt;
5824
5825 if (TREE_CODE (lhs) != SSA_NAME)
5826 return lhs;
5827
5828 for (;;)
5829 {
5830 stmt = SSA_NAME_DEF_STMT (lhs);
5831
5832 if (TREE_CODE (stmt) == PHI_NODE)
5833 return stmt;
5834
5835 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT
5836 || GIMPLE_STMT_OPERAND (stmt, 0) != lhs)
5837 return lhs;
5838
5839 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
5840 if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
5841 rhs = TREE_OPERAND (rhs, 0);
5842
5843 if ((TREE_CODE (rhs) != NOP_EXPR
5844 && TREE_CODE (rhs) != CONVERT_EXPR
5845 && (TREE_CODE (rhs) != PLUS_EXPR
5846 || TREE_CODE (TREE_OPERAND (rhs, 1)) != INTEGER_CST
5847 || !host_integerp (TREE_OPERAND (rhs, 1), 1)))
5848 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5849 return rhs;
5850
5851 lhs = TREE_OPERAND (rhs, 0);
5852 }
5853 }
5854
5855 /* Check if LHS = RHS statement is
5856 LHS = *(ap.__base + ap.__offset + cst)
5857 or
5858 LHS = *(ap.__base
5859 + ((ap.__offset + cst <= 47)
5860 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5861 If the former, indicate that GPR registers are needed,
5862 if the latter, indicate that FPR registers are needed.
5863
5864 Also look for LHS = (*ptr).field, where ptr is one of the forms
5865 listed above.
5866
5867 On alpha, cfun->va_list_gpr_size is used as size of the needed
5868 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5869 registers are needed and bit 1 set if FPR registers are needed.
5870 Return true if va_list references should not be scanned for the
5871 current statement. */
5872
5873 static bool
5874 alpha_stdarg_optimize_hook (struct stdarg_info *si, tree lhs, tree rhs)
5875 {
5876 tree base, offset, arg1, arg2;
5877 int offset_arg = 1;
5878
5879 while (handled_component_p (rhs))
5880 rhs = TREE_OPERAND (rhs, 0);
5881 if (TREE_CODE (rhs) != INDIRECT_REF
5882 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5883 return false;
5884
5885 lhs = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5886 if (lhs == NULL_TREE
5887 || TREE_CODE (lhs) != PLUS_EXPR)
5888 return false;
5889
5890 base = TREE_OPERAND (lhs, 0);
5891 if (TREE_CODE (base) == SSA_NAME)
5892 base = va_list_skip_additions (base);
5893
5894 if (TREE_CODE (base) != COMPONENT_REF
5895 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5896 {
5897 base = TREE_OPERAND (lhs, 0);
5898 if (TREE_CODE (base) == SSA_NAME)
5899 base = va_list_skip_additions (base);
5900
5901 if (TREE_CODE (base) != COMPONENT_REF
5902 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5903 return false;
5904
5905 offset_arg = 0;
5906 }
5907
5908 base = get_base_address (base);
5909 if (TREE_CODE (base) != VAR_DECL
5910 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base)))
5911 return false;
5912
5913 offset = TREE_OPERAND (lhs, offset_arg);
5914 if (TREE_CODE (offset) == SSA_NAME)
5915 offset = va_list_skip_additions (offset);
5916
5917 if (TREE_CODE (offset) == PHI_NODE)
5918 {
5919 HOST_WIDE_INT sub;
5920
5921 if (PHI_NUM_ARGS (offset) != 2)
5922 goto escapes;
5923
5924 arg1 = va_list_skip_additions (PHI_ARG_DEF (offset, 0));
5925 arg2 = va_list_skip_additions (PHI_ARG_DEF (offset, 1));
5926 if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
5927 {
5928 tree tem = arg1;
5929 arg1 = arg2;
5930 arg2 = tem;
5931
5932 if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
5933 goto escapes;
5934 }
5935 if (!host_integerp (TREE_OPERAND (arg2, 1), 0))
5936 goto escapes;
5937
5938 sub = tree_low_cst (TREE_OPERAND (arg2, 1), 0);
5939 if (TREE_CODE (arg2) == MINUS_EXPR)
5940 sub = -sub;
5941 if (sub < -48 || sub > -32)
5942 goto escapes;
5943
5944 arg2 = va_list_skip_additions (TREE_OPERAND (arg2, 0));
5945 if (arg1 != arg2)
5946 goto escapes;
5947
5948 if (TREE_CODE (arg1) == SSA_NAME)
5949 arg1 = va_list_skip_additions (arg1);
5950
5951 if (TREE_CODE (arg1) != COMPONENT_REF
5952 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
5953 || get_base_address (arg1) != base)
5954 goto escapes;
5955
5956 /* Need floating point regs. */
5957 cfun->va_list_fpr_size |= 2;
5958 }
5959 else if (TREE_CODE (offset) != COMPONENT_REF
5960 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
5961 || get_base_address (offset) != base)
5962 goto escapes;
5963 else
5964 /* Need general regs. */
5965 cfun->va_list_fpr_size |= 1;
5966 return false;
5967
5968 escapes:
5969 si->va_list_escapes = true;
5970 return false;
5971 }
5972 #endif
5973
5974 /* Perform any needed actions needed for a function that is receiving a
5975 variable number of arguments. */
5976
5977 static void
5978 alpha_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5979 tree type, int *pretend_size, int no_rtl)
5980 {
5981 CUMULATIVE_ARGS cum = *pcum;
5982
5983 /* Skip the current argument. */
5984 FUNCTION_ARG_ADVANCE (cum, mode, type, 1);
5985
5986 #if TARGET_ABI_UNICOSMK
5987 /* On Unicos/Mk, the standard subroutine __T3E_MISMATCH stores all register
5988 arguments on the stack. Unfortunately, it doesn't always store the first
5989 one (i.e. the one that arrives in $16 or $f16). This is not a problem
5990 with stdargs as we always have at least one named argument there. */
5991 if (cum.num_reg_words < 6)
5992 {
5993 if (!no_rtl)
5994 {
5995 emit_insn (gen_umk_mismatch_args (GEN_INT (cum.num_reg_words)));
5996 emit_insn (gen_arg_home_umk ());
5997 }
5998 *pretend_size = 0;
5999 }
6000 #elif TARGET_ABI_OPEN_VMS
6001 /* For VMS, we allocate space for all 6 arg registers plus a count.
6002
6003 However, if NO registers need to be saved, don't allocate any space.
6004 This is not only because we won't need the space, but because AP
6005 includes the current_pretend_args_size and we don't want to mess up
6006 any ap-relative addresses already made. */
6007 if (cum.num_args < 6)
6008 {
6009 if (!no_rtl)
6010 {
6011 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6012 emit_insn (gen_arg_home ());
6013 }
6014 *pretend_size = 7 * UNITS_PER_WORD;
6015 }
6016 #else
6017 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6018 only push those that are remaining. However, if NO registers need to
6019 be saved, don't allocate any space. This is not only because we won't
6020 need the space, but because AP includes the current_pretend_args_size
6021 and we don't want to mess up any ap-relative addresses already made.
6022
6023 If we are not to use the floating-point registers, save the integer
6024 registers where we would put the floating-point registers. This is
6025 not the most efficient way to implement varargs with just one register
6026 class, but it isn't worth doing anything more efficient in this rare
6027 case. */
6028 if (cum >= 6)
6029 return;
6030
6031 if (!no_rtl)
6032 {
6033 int count, set = get_varargs_alias_set ();
6034 rtx tmp;
6035
6036 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6037 if (count > 6 - cum)
6038 count = 6 - cum;
6039
6040 /* Detect whether integer registers or floating-point registers
6041 are needed by the detected va_arg statements. See above for
6042 how these values are computed. Note that the "escape" value
6043 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6044 these bits set. */
6045 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6046
6047 if (cfun->va_list_fpr_size & 1)
6048 {
6049 tmp = gen_rtx_MEM (BLKmode,
6050 plus_constant (virtual_incoming_args_rtx,
6051 (cum + 6) * UNITS_PER_WORD));
6052 MEM_NOTRAP_P (tmp) = 1;
6053 set_mem_alias_set (tmp, set);
6054 move_block_from_reg (16 + cum, tmp, count);
6055 }
6056
6057 if (cfun->va_list_fpr_size & 2)
6058 {
6059 tmp = gen_rtx_MEM (BLKmode,
6060 plus_constant (virtual_incoming_args_rtx,
6061 cum * UNITS_PER_WORD));
6062 MEM_NOTRAP_P (tmp) = 1;
6063 set_mem_alias_set (tmp, set);
6064 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6065 }
6066 }
6067 *pretend_size = 12 * UNITS_PER_WORD;
6068 #endif
6069 }
6070
6071 void
6072 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6073 {
6074 HOST_WIDE_INT offset;
6075 tree t, offset_field, base_field;
6076
6077 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6078 return;
6079
6080 if (TARGET_ABI_UNICOSMK)
6081 std_expand_builtin_va_start (valist, nextarg);
6082
6083 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6084 up by 48, storing fp arg registers in the first 48 bytes, and the
6085 integer arg registers in the next 48 bytes. This is only done,
6086 however, if any integer registers need to be stored.
6087
6088 If no integer registers need be stored, then we must subtract 48
6089 in order to account for the integer arg registers which are counted
6090 in argsize above, but which are not actually stored on the stack.
6091 Must further be careful here about structures straddling the last
6092 integer argument register; that futzes with pretend_args_size,
6093 which changes the meaning of AP. */
6094
6095 if (NUM_ARGS < 6)
6096 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6097 else
6098 offset = -6 * UNITS_PER_WORD + current_function_pretend_args_size;
6099
6100 if (TARGET_ABI_OPEN_VMS)
6101 {
6102 nextarg = plus_constant (nextarg, offset);
6103 nextarg = plus_constant (nextarg, NUM_ARGS * UNITS_PER_WORD);
6104 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (valist), valist,
6105 make_tree (ptr_type_node, nextarg));
6106 TREE_SIDE_EFFECTS (t) = 1;
6107
6108 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6109 }
6110 else
6111 {
6112 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6113 offset_field = TREE_CHAIN (base_field);
6114
6115 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6116 valist, base_field, NULL_TREE);
6117 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6118 valist, offset_field, NULL_TREE);
6119
6120 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6121 t = build2 (PLUS_EXPR, ptr_type_node, t,
6122 build_int_cst (NULL_TREE, offset));
6123 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (base_field), base_field, t);
6124 TREE_SIDE_EFFECTS (t) = 1;
6125 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6126
6127 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6128 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (offset_field),
6129 offset_field, t);
6130 TREE_SIDE_EFFECTS (t) = 1;
6131 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6132 }
6133 }
6134
6135 static tree
6136 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, tree *pre_p)
6137 {
6138 tree type_size, ptr_type, addend, t, addr, internal_post;
6139
6140 /* If the type could not be passed in registers, skip the block
6141 reserved for the registers. */
6142 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6143 {
6144 t = build_int_cst (TREE_TYPE (offset), 6*8);
6145 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (offset), offset,
6146 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t));
6147 gimplify_and_add (t, pre_p);
6148 }
6149
6150 addend = offset;
6151 ptr_type = build_pointer_type (type);
6152
6153 if (TREE_CODE (type) == COMPLEX_TYPE)
6154 {
6155 tree real_part, imag_part, real_temp;
6156
6157 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6158 offset, pre_p);
6159
6160 /* Copy the value into a new temporary, lest the formal temporary
6161 be reused out from under us. */
6162 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6163
6164 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6165 offset, pre_p);
6166
6167 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6168 }
6169 else if (TREE_CODE (type) == REAL_TYPE)
6170 {
6171 tree fpaddend, cond, fourtyeight;
6172
6173 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6174 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6175 addend, fourtyeight);
6176 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6177 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6178 fpaddend, addend);
6179 }
6180
6181 /* Build the final address and force that value into a temporary. */
6182 addr = build2 (PLUS_EXPR, ptr_type, fold_convert (ptr_type, base),
6183 fold_convert (ptr_type, addend));
6184 internal_post = NULL;
6185 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6186 append_to_statement_list (internal_post, pre_p);
6187
6188 /* Update the offset field. */
6189 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6190 if (type_size == NULL || TREE_OVERFLOW (type_size))
6191 t = size_zero_node;
6192 else
6193 {
6194 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6195 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6196 t = size_binop (MULT_EXPR, t, size_int (8));
6197 }
6198 t = fold_convert (TREE_TYPE (offset), t);
6199 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, offset,
6200 build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t));
6201 gimplify_and_add (t, pre_p);
6202
6203 return build_va_arg_indirect_ref (addr);
6204 }
6205
6206 static tree
6207 alpha_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
6208 {
6209 tree offset_field, base_field, offset, base, t, r;
6210 bool indirect;
6211
6212 if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK)
6213 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6214
6215 base_field = TYPE_FIELDS (va_list_type_node);
6216 offset_field = TREE_CHAIN (base_field);
6217 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6218 valist, base_field, NULL_TREE);
6219 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6220 valist, offset_field, NULL_TREE);
6221
6222 /* Pull the fields of the structure out into temporaries. Since we never
6223 modify the base field, we can use a formal temporary. Sign-extend the
6224 offset field so that it's the proper width for pointer arithmetic. */
6225 base = get_formal_tmp_var (base_field, pre_p);
6226
6227 t = fold_convert (lang_hooks.types.type_for_size (64, 0), offset_field);
6228 offset = get_initialized_tmp_var (t, pre_p, NULL);
6229
6230 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6231 if (indirect)
6232 type = build_pointer_type (type);
6233
6234 /* Find the value. Note that this will be a stable indirection, or
6235 a composite of stable indirections in the case of complex. */
6236 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6237
6238 /* Stuff the offset temporary back into its field. */
6239 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, offset_field,
6240 fold_convert (TREE_TYPE (offset_field), offset));
6241 gimplify_and_add (t, pre_p);
6242
6243 if (indirect)
6244 r = build_va_arg_indirect_ref (r);
6245
6246 return r;
6247 }
6248 \f
6249 /* Builtins. */
6250
6251 enum alpha_builtin
6252 {
6253 ALPHA_BUILTIN_CMPBGE,
6254 ALPHA_BUILTIN_EXTBL,
6255 ALPHA_BUILTIN_EXTWL,
6256 ALPHA_BUILTIN_EXTLL,
6257 ALPHA_BUILTIN_EXTQL,
6258 ALPHA_BUILTIN_EXTWH,
6259 ALPHA_BUILTIN_EXTLH,
6260 ALPHA_BUILTIN_EXTQH,
6261 ALPHA_BUILTIN_INSBL,
6262 ALPHA_BUILTIN_INSWL,
6263 ALPHA_BUILTIN_INSLL,
6264 ALPHA_BUILTIN_INSQL,
6265 ALPHA_BUILTIN_INSWH,
6266 ALPHA_BUILTIN_INSLH,
6267 ALPHA_BUILTIN_INSQH,
6268 ALPHA_BUILTIN_MSKBL,
6269 ALPHA_BUILTIN_MSKWL,
6270 ALPHA_BUILTIN_MSKLL,
6271 ALPHA_BUILTIN_MSKQL,
6272 ALPHA_BUILTIN_MSKWH,
6273 ALPHA_BUILTIN_MSKLH,
6274 ALPHA_BUILTIN_MSKQH,
6275 ALPHA_BUILTIN_UMULH,
6276 ALPHA_BUILTIN_ZAP,
6277 ALPHA_BUILTIN_ZAPNOT,
6278 ALPHA_BUILTIN_AMASK,
6279 ALPHA_BUILTIN_IMPLVER,
6280 ALPHA_BUILTIN_RPCC,
6281 ALPHA_BUILTIN_THREAD_POINTER,
6282 ALPHA_BUILTIN_SET_THREAD_POINTER,
6283
6284 /* TARGET_MAX */
6285 ALPHA_BUILTIN_MINUB8,
6286 ALPHA_BUILTIN_MINSB8,
6287 ALPHA_BUILTIN_MINUW4,
6288 ALPHA_BUILTIN_MINSW4,
6289 ALPHA_BUILTIN_MAXUB8,
6290 ALPHA_BUILTIN_MAXSB8,
6291 ALPHA_BUILTIN_MAXUW4,
6292 ALPHA_BUILTIN_MAXSW4,
6293 ALPHA_BUILTIN_PERR,
6294 ALPHA_BUILTIN_PKLB,
6295 ALPHA_BUILTIN_PKWB,
6296 ALPHA_BUILTIN_UNPKBL,
6297 ALPHA_BUILTIN_UNPKBW,
6298
6299 /* TARGET_CIX */
6300 ALPHA_BUILTIN_CTTZ,
6301 ALPHA_BUILTIN_CTLZ,
6302 ALPHA_BUILTIN_CTPOP,
6303
6304 ALPHA_BUILTIN_max
6305 };
6306
6307 static unsigned int const code_for_builtin[ALPHA_BUILTIN_max] = {
6308 CODE_FOR_builtin_cmpbge,
6309 CODE_FOR_builtin_extbl,
6310 CODE_FOR_builtin_extwl,
6311 CODE_FOR_builtin_extll,
6312 CODE_FOR_builtin_extql,
6313 CODE_FOR_builtin_extwh,
6314 CODE_FOR_builtin_extlh,
6315 CODE_FOR_builtin_extqh,
6316 CODE_FOR_builtin_insbl,
6317 CODE_FOR_builtin_inswl,
6318 CODE_FOR_builtin_insll,
6319 CODE_FOR_builtin_insql,
6320 CODE_FOR_builtin_inswh,
6321 CODE_FOR_builtin_inslh,
6322 CODE_FOR_builtin_insqh,
6323 CODE_FOR_builtin_mskbl,
6324 CODE_FOR_builtin_mskwl,
6325 CODE_FOR_builtin_mskll,
6326 CODE_FOR_builtin_mskql,
6327 CODE_FOR_builtin_mskwh,
6328 CODE_FOR_builtin_msklh,
6329 CODE_FOR_builtin_mskqh,
6330 CODE_FOR_umuldi3_highpart,
6331 CODE_FOR_builtin_zap,
6332 CODE_FOR_builtin_zapnot,
6333 CODE_FOR_builtin_amask,
6334 CODE_FOR_builtin_implver,
6335 CODE_FOR_builtin_rpcc,
6336 CODE_FOR_load_tp,
6337 CODE_FOR_set_tp,
6338
6339 /* TARGET_MAX */
6340 CODE_FOR_builtin_minub8,
6341 CODE_FOR_builtin_minsb8,
6342 CODE_FOR_builtin_minuw4,
6343 CODE_FOR_builtin_minsw4,
6344 CODE_FOR_builtin_maxub8,
6345 CODE_FOR_builtin_maxsb8,
6346 CODE_FOR_builtin_maxuw4,
6347 CODE_FOR_builtin_maxsw4,
6348 CODE_FOR_builtin_perr,
6349 CODE_FOR_builtin_pklb,
6350 CODE_FOR_builtin_pkwb,
6351 CODE_FOR_builtin_unpkbl,
6352 CODE_FOR_builtin_unpkbw,
6353
6354 /* TARGET_CIX */
6355 CODE_FOR_ctzdi2,
6356 CODE_FOR_clzdi2,
6357 CODE_FOR_popcountdi2
6358 };
6359
6360 struct alpha_builtin_def
6361 {
6362 const char *name;
6363 enum alpha_builtin code;
6364 unsigned int target_mask;
6365 bool is_const;
6366 };
6367
6368 static struct alpha_builtin_def const zero_arg_builtins[] = {
6369 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6370 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6371 };
6372
6373 static struct alpha_builtin_def const one_arg_builtins[] = {
6374 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6375 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6376 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6377 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6378 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6379 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6380 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6381 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6382 };
6383
6384 static struct alpha_builtin_def const two_arg_builtins[] = {
6385 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6386 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6387 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6388 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6389 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6390 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6391 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6392 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6393 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6394 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6395 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6396 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6397 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6398 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6399 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6400 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6401 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6402 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6403 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6404 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6405 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6406 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6407 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6408 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6409 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6410 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6411 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6412 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6413 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6414 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6415 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6416 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6417 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6418 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6419 };
6420
6421 static GTY(()) tree alpha_v8qi_u;
6422 static GTY(()) tree alpha_v8qi_s;
6423 static GTY(()) tree alpha_v4hi_u;
6424 static GTY(()) tree alpha_v4hi_s;
6425
6426 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6427 functions pointed to by P, with function type FTYPE. */
6428
6429 static void
6430 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6431 tree ftype)
6432 {
6433 tree decl;
6434 size_t i;
6435
6436 for (i = 0; i < count; ++i, ++p)
6437 if ((target_flags & p->target_mask) == p->target_mask)
6438 {
6439 decl = add_builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
6440 NULL, NULL);
6441 if (p->is_const)
6442 TREE_READONLY (decl) = 1;
6443 TREE_NOTHROW (decl) = 1;
6444 }
6445 }
6446
6447
6448 static void
6449 alpha_init_builtins (void)
6450 {
6451 tree dimode_integer_type_node;
6452 tree ftype, decl;
6453
6454 dimode_integer_type_node = lang_hooks.types.type_for_mode (DImode, 0);
6455
6456 ftype = build_function_type (dimode_integer_type_node, void_list_node);
6457 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins),
6458 ftype);
6459
6460 ftype = build_function_type_list (dimode_integer_type_node,
6461 dimode_integer_type_node, NULL_TREE);
6462 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins),
6463 ftype);
6464
6465 ftype = build_function_type_list (dimode_integer_type_node,
6466 dimode_integer_type_node,
6467 dimode_integer_type_node, NULL_TREE);
6468 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins),
6469 ftype);
6470
6471 ftype = build_function_type (ptr_type_node, void_list_node);
6472 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
6473 ALPHA_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
6474 NULL, NULL);
6475 TREE_NOTHROW (decl) = 1;
6476
6477 ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
6478 decl = add_builtin_function ("__builtin_set_thread_pointer", ftype,
6479 ALPHA_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD,
6480 NULL, NULL);
6481 TREE_NOTHROW (decl) = 1;
6482
6483 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6484 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6485 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6486 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6487 }
6488
6489 /* Expand an expression EXP that calls a built-in function,
6490 with result going to TARGET if that's convenient
6491 (and in mode MODE if that's convenient).
6492 SUBTARGET may be used as the target for computing one of EXP's operands.
6493 IGNORE is nonzero if the value is to be ignored. */
6494
6495 static rtx
6496 alpha_expand_builtin (tree exp, rtx target,
6497 rtx subtarget ATTRIBUTE_UNUSED,
6498 enum machine_mode mode ATTRIBUTE_UNUSED,
6499 int ignore ATTRIBUTE_UNUSED)
6500 {
6501 #define MAX_ARGS 2
6502
6503 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6504 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6505 tree arg;
6506 call_expr_arg_iterator iter;
6507 enum insn_code icode;
6508 rtx op[MAX_ARGS], pat;
6509 int arity;
6510 bool nonvoid;
6511
6512 if (fcode >= ALPHA_BUILTIN_max)
6513 internal_error ("bad builtin fcode");
6514 icode = code_for_builtin[fcode];
6515 if (icode == 0)
6516 internal_error ("bad builtin fcode");
6517
6518 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6519
6520 arity = 0;
6521 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6522 {
6523 const struct insn_operand_data *insn_op;
6524
6525 if (arg == error_mark_node)
6526 return NULL_RTX;
6527 if (arity > MAX_ARGS)
6528 return NULL_RTX;
6529
6530 insn_op = &insn_data[icode].operand[arity + nonvoid];
6531
6532 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, 0);
6533
6534 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6535 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6536 arity++;
6537 }
6538
6539 if (nonvoid)
6540 {
6541 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6542 if (!target
6543 || GET_MODE (target) != tmode
6544 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6545 target = gen_reg_rtx (tmode);
6546 }
6547
6548 switch (arity)
6549 {
6550 case 0:
6551 pat = GEN_FCN (icode) (target);
6552 break;
6553 case 1:
6554 if (nonvoid)
6555 pat = GEN_FCN (icode) (target, op[0]);
6556 else
6557 pat = GEN_FCN (icode) (op[0]);
6558 break;
6559 case 2:
6560 pat = GEN_FCN (icode) (target, op[0], op[1]);
6561 break;
6562 default:
6563 gcc_unreachable ();
6564 }
6565 if (!pat)
6566 return NULL_RTX;
6567 emit_insn (pat);
6568
6569 if (nonvoid)
6570 return target;
6571 else
6572 return const0_rtx;
6573 }
6574
6575
6576 /* Several bits below assume HWI >= 64 bits. This should be enforced
6577 by config.gcc. */
6578 #if HOST_BITS_PER_WIDE_INT < 64
6579 # error "HOST_WIDE_INT too small"
6580 #endif
6581
6582 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6583 with an 8-bit output vector. OPINT contains the integer operands; bit N
6584 of OP_CONST is set if OPINT[N] is valid. */
6585
6586 static tree
6587 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6588 {
6589 if (op_const == 3)
6590 {
6591 int i, val;
6592 for (i = 0, val = 0; i < 8; ++i)
6593 {
6594 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6595 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6596 if (c0 >= c1)
6597 val |= 1 << i;
6598 }
6599 return build_int_cst (long_integer_type_node, val);
6600 }
6601 else if (op_const == 2 && opint[1] == 0)
6602 return build_int_cst (long_integer_type_node, 0xff);
6603 return NULL;
6604 }
6605
6606 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6607 specialized form of an AND operation. Other byte manipulation instructions
6608 are defined in terms of this instruction, so this is also used as a
6609 subroutine for other builtins.
6610
6611 OP contains the tree operands; OPINT contains the extracted integer values.
6612 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6613 OPINT may be considered. */
6614
6615 static tree
6616 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6617 long op_const)
6618 {
6619 if (op_const & 2)
6620 {
6621 unsigned HOST_WIDE_INT mask = 0;
6622 int i;
6623
6624 for (i = 0; i < 8; ++i)
6625 if ((opint[1] >> i) & 1)
6626 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6627
6628 if (op_const & 1)
6629 return build_int_cst (long_integer_type_node, opint[0] & mask);
6630
6631 if (op)
6632 return fold_build2 (BIT_AND_EXPR, long_integer_type_node, op[0],
6633 build_int_cst (long_integer_type_node, mask));
6634 }
6635 else if ((op_const & 1) && opint[0] == 0)
6636 return build_int_cst (long_integer_type_node, 0);
6637 return NULL;
6638 }
6639
6640 /* Fold the builtins for the EXT family of instructions. */
6641
6642 static tree
6643 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6644 long op_const, unsigned HOST_WIDE_INT bytemask,
6645 bool is_high)
6646 {
6647 long zap_const = 2;
6648 tree *zap_op = NULL;
6649
6650 if (op_const & 2)
6651 {
6652 unsigned HOST_WIDE_INT loc;
6653
6654 loc = opint[1] & 7;
6655 if (BYTES_BIG_ENDIAN)
6656 loc ^= 7;
6657 loc *= 8;
6658
6659 if (loc != 0)
6660 {
6661 if (op_const & 1)
6662 {
6663 unsigned HOST_WIDE_INT temp = opint[0];
6664 if (is_high)
6665 temp <<= loc;
6666 else
6667 temp >>= loc;
6668 opint[0] = temp;
6669 zap_const = 3;
6670 }
6671 }
6672 else
6673 zap_op = op;
6674 }
6675
6676 opint[1] = bytemask;
6677 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6678 }
6679
6680 /* Fold the builtins for the INS family of instructions. */
6681
6682 static tree
6683 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6684 long op_const, unsigned HOST_WIDE_INT bytemask,
6685 bool is_high)
6686 {
6687 if ((op_const & 1) && opint[0] == 0)
6688 return build_int_cst (long_integer_type_node, 0);
6689
6690 if (op_const & 2)
6691 {
6692 unsigned HOST_WIDE_INT temp, loc, byteloc;
6693 tree *zap_op = NULL;
6694
6695 loc = opint[1] & 7;
6696 if (BYTES_BIG_ENDIAN)
6697 loc ^= 7;
6698 bytemask <<= loc;
6699
6700 temp = opint[0];
6701 if (is_high)
6702 {
6703 byteloc = (64 - (loc * 8)) & 0x3f;
6704 if (byteloc == 0)
6705 zap_op = op;
6706 else
6707 temp >>= byteloc;
6708 bytemask >>= 8;
6709 }
6710 else
6711 {
6712 byteloc = loc * 8;
6713 if (byteloc == 0)
6714 zap_op = op;
6715 else
6716 temp <<= byteloc;
6717 }
6718
6719 opint[0] = temp;
6720 opint[1] = bytemask;
6721 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6722 }
6723
6724 return NULL;
6725 }
6726
6727 static tree
6728 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6729 long op_const, unsigned HOST_WIDE_INT bytemask,
6730 bool is_high)
6731 {
6732 if (op_const & 2)
6733 {
6734 unsigned HOST_WIDE_INT loc;
6735
6736 loc = opint[1] & 7;
6737 if (BYTES_BIG_ENDIAN)
6738 loc ^= 7;
6739 bytemask <<= loc;
6740
6741 if (is_high)
6742 bytemask >>= 8;
6743
6744 opint[1] = bytemask ^ 0xff;
6745 }
6746
6747 return alpha_fold_builtin_zapnot (op, opint, op_const);
6748 }
6749
6750 static tree
6751 alpha_fold_builtin_umulh (unsigned HOST_WIDE_INT opint[], long op_const)
6752 {
6753 switch (op_const)
6754 {
6755 case 3:
6756 {
6757 unsigned HOST_WIDE_INT l;
6758 HOST_WIDE_INT h;
6759
6760 mul_double (opint[0], 0, opint[1], 0, &l, &h);
6761
6762 #if HOST_BITS_PER_WIDE_INT > 64
6763 # error fixme
6764 #endif
6765
6766 return build_int_cst (long_integer_type_node, h);
6767 }
6768
6769 case 1:
6770 opint[1] = opint[0];
6771 /* FALLTHRU */
6772 case 2:
6773 /* Note that (X*1) >> 64 == 0. */
6774 if (opint[1] == 0 || opint[1] == 1)
6775 return build_int_cst (long_integer_type_node, 0);
6776 break;
6777 }
6778 return NULL;
6779 }
6780
6781 static tree
6782 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6783 {
6784 tree op0 = fold_convert (vtype, op[0]);
6785 tree op1 = fold_convert (vtype, op[1]);
6786 tree val = fold_build2 (code, vtype, op0, op1);
6787 return fold_convert (long_integer_type_node, val);
6788 }
6789
6790 static tree
6791 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6792 {
6793 unsigned HOST_WIDE_INT temp = 0;
6794 int i;
6795
6796 if (op_const != 3)
6797 return NULL;
6798
6799 for (i = 0; i < 8; ++i)
6800 {
6801 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6802 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6803 if (a >= b)
6804 temp += a - b;
6805 else
6806 temp += b - a;
6807 }
6808
6809 return build_int_cst (long_integer_type_node, temp);
6810 }
6811
6812 static tree
6813 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6814 {
6815 unsigned HOST_WIDE_INT temp;
6816
6817 if (op_const == 0)
6818 return NULL;
6819
6820 temp = opint[0] & 0xff;
6821 temp |= (opint[0] >> 24) & 0xff00;
6822
6823 return build_int_cst (long_integer_type_node, temp);
6824 }
6825
6826 static tree
6827 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6828 {
6829 unsigned HOST_WIDE_INT temp;
6830
6831 if (op_const == 0)
6832 return NULL;
6833
6834 temp = opint[0] & 0xff;
6835 temp |= (opint[0] >> 8) & 0xff00;
6836 temp |= (opint[0] >> 16) & 0xff0000;
6837 temp |= (opint[0] >> 24) & 0xff000000;
6838
6839 return build_int_cst (long_integer_type_node, temp);
6840 }
6841
6842 static tree
6843 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6844 {
6845 unsigned HOST_WIDE_INT temp;
6846
6847 if (op_const == 0)
6848 return NULL;
6849
6850 temp = opint[0] & 0xff;
6851 temp |= (opint[0] & 0xff00) << 24;
6852
6853 return build_int_cst (long_integer_type_node, temp);
6854 }
6855
6856 static tree
6857 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6858 {
6859 unsigned HOST_WIDE_INT temp;
6860
6861 if (op_const == 0)
6862 return NULL;
6863
6864 temp = opint[0] & 0xff;
6865 temp |= (opint[0] & 0x0000ff00) << 8;
6866 temp |= (opint[0] & 0x00ff0000) << 16;
6867 temp |= (opint[0] & 0xff000000) << 24;
6868
6869 return build_int_cst (long_integer_type_node, temp);
6870 }
6871
6872 static tree
6873 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6874 {
6875 unsigned HOST_WIDE_INT temp;
6876
6877 if (op_const == 0)
6878 return NULL;
6879
6880 if (opint[0] == 0)
6881 temp = 64;
6882 else
6883 temp = exact_log2 (opint[0] & -opint[0]);
6884
6885 return build_int_cst (long_integer_type_node, temp);
6886 }
6887
6888 static tree
6889 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6890 {
6891 unsigned HOST_WIDE_INT temp;
6892
6893 if (op_const == 0)
6894 return NULL;
6895
6896 if (opint[0] == 0)
6897 temp = 64;
6898 else
6899 temp = 64 - floor_log2 (opint[0]) - 1;
6900
6901 return build_int_cst (long_integer_type_node, temp);
6902 }
6903
6904 static tree
6905 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
6906 {
6907 unsigned HOST_WIDE_INT temp, op;
6908
6909 if (op_const == 0)
6910 return NULL;
6911
6912 op = opint[0];
6913 temp = 0;
6914 while (op)
6915 temp++, op &= op - 1;
6916
6917 return build_int_cst (long_integer_type_node, temp);
6918 }
6919
6920 /* Fold one of our builtin functions. */
6921
6922 static tree
6923 alpha_fold_builtin (tree fndecl, tree arglist, bool ignore ATTRIBUTE_UNUSED)
6924 {
6925 tree op[MAX_ARGS], t;
6926 unsigned HOST_WIDE_INT opint[MAX_ARGS];
6927 long op_const = 0, arity = 0;
6928
6929 for (t = arglist; t ; t = TREE_CHAIN (t), ++arity)
6930 {
6931 tree arg = TREE_VALUE (t);
6932 if (arg == error_mark_node)
6933 return NULL;
6934 if (arity >= MAX_ARGS)
6935 return NULL;
6936
6937 op[arity] = arg;
6938 opint[arity] = 0;
6939 if (TREE_CODE (arg) == INTEGER_CST)
6940 {
6941 op_const |= 1L << arity;
6942 opint[arity] = int_cst_value (arg);
6943 }
6944 }
6945
6946 switch (DECL_FUNCTION_CODE (fndecl))
6947 {
6948 case ALPHA_BUILTIN_CMPBGE:
6949 return alpha_fold_builtin_cmpbge (opint, op_const);
6950
6951 case ALPHA_BUILTIN_EXTBL:
6952 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
6953 case ALPHA_BUILTIN_EXTWL:
6954 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
6955 case ALPHA_BUILTIN_EXTLL:
6956 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
6957 case ALPHA_BUILTIN_EXTQL:
6958 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
6959 case ALPHA_BUILTIN_EXTWH:
6960 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
6961 case ALPHA_BUILTIN_EXTLH:
6962 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
6963 case ALPHA_BUILTIN_EXTQH:
6964 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
6965
6966 case ALPHA_BUILTIN_INSBL:
6967 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
6968 case ALPHA_BUILTIN_INSWL:
6969 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
6970 case ALPHA_BUILTIN_INSLL:
6971 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
6972 case ALPHA_BUILTIN_INSQL:
6973 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
6974 case ALPHA_BUILTIN_INSWH:
6975 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
6976 case ALPHA_BUILTIN_INSLH:
6977 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
6978 case ALPHA_BUILTIN_INSQH:
6979 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
6980
6981 case ALPHA_BUILTIN_MSKBL:
6982 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
6983 case ALPHA_BUILTIN_MSKWL:
6984 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
6985 case ALPHA_BUILTIN_MSKLL:
6986 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
6987 case ALPHA_BUILTIN_MSKQL:
6988 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
6989 case ALPHA_BUILTIN_MSKWH:
6990 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
6991 case ALPHA_BUILTIN_MSKLH:
6992 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
6993 case ALPHA_BUILTIN_MSKQH:
6994 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
6995
6996 case ALPHA_BUILTIN_UMULH:
6997 return alpha_fold_builtin_umulh (opint, op_const);
6998
6999 case ALPHA_BUILTIN_ZAP:
7000 opint[1] ^= 0xff;
7001 /* FALLTHRU */
7002 case ALPHA_BUILTIN_ZAPNOT:
7003 return alpha_fold_builtin_zapnot (op, opint, op_const);
7004
7005 case ALPHA_BUILTIN_MINUB8:
7006 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7007 case ALPHA_BUILTIN_MINSB8:
7008 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7009 case ALPHA_BUILTIN_MINUW4:
7010 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7011 case ALPHA_BUILTIN_MINSW4:
7012 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7013 case ALPHA_BUILTIN_MAXUB8:
7014 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7015 case ALPHA_BUILTIN_MAXSB8:
7016 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7017 case ALPHA_BUILTIN_MAXUW4:
7018 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7019 case ALPHA_BUILTIN_MAXSW4:
7020 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7021
7022 case ALPHA_BUILTIN_PERR:
7023 return alpha_fold_builtin_perr (opint, op_const);
7024 case ALPHA_BUILTIN_PKLB:
7025 return alpha_fold_builtin_pklb (opint, op_const);
7026 case ALPHA_BUILTIN_PKWB:
7027 return alpha_fold_builtin_pkwb (opint, op_const);
7028 case ALPHA_BUILTIN_UNPKBL:
7029 return alpha_fold_builtin_unpkbl (opint, op_const);
7030 case ALPHA_BUILTIN_UNPKBW:
7031 return alpha_fold_builtin_unpkbw (opint, op_const);
7032
7033 case ALPHA_BUILTIN_CTTZ:
7034 return alpha_fold_builtin_cttz (opint, op_const);
7035 case ALPHA_BUILTIN_CTLZ:
7036 return alpha_fold_builtin_ctlz (opint, op_const);
7037 case ALPHA_BUILTIN_CTPOP:
7038 return alpha_fold_builtin_ctpop (opint, op_const);
7039
7040 case ALPHA_BUILTIN_AMASK:
7041 case ALPHA_BUILTIN_IMPLVER:
7042 case ALPHA_BUILTIN_RPCC:
7043 case ALPHA_BUILTIN_THREAD_POINTER:
7044 case ALPHA_BUILTIN_SET_THREAD_POINTER:
7045 /* None of these are foldable at compile-time. */
7046 default:
7047 return NULL;
7048 }
7049 }
7050 \f
7051 /* This page contains routines that are used to determine what the function
7052 prologue and epilogue code will do and write them out. */
7053
7054 /* Compute the size of the save area in the stack. */
7055
7056 /* These variables are used for communication between the following functions.
7057 They indicate various things about the current function being compiled
7058 that are used to tell what kind of prologue, epilogue and procedure
7059 descriptor to generate. */
7060
7061 /* Nonzero if we need a stack procedure. */
7062 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7063 static enum alpha_procedure_types alpha_procedure_type;
7064
7065 /* Register number (either FP or SP) that is used to unwind the frame. */
7066 static int vms_unwind_regno;
7067
7068 /* Register number used to save FP. We need not have one for RA since
7069 we don't modify it for register procedures. This is only defined
7070 for register frame procedures. */
7071 static int vms_save_fp_regno;
7072
7073 /* Register number used to reference objects off our PV. */
7074 static int vms_base_regno;
7075
7076 /* Compute register masks for saved registers. */
7077
7078 static void
7079 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7080 {
7081 unsigned long imask = 0;
7082 unsigned long fmask = 0;
7083 unsigned int i;
7084
7085 /* When outputting a thunk, we don't have valid register life info,
7086 but assemble_start_function wants to output .frame and .mask
7087 directives. */
7088 if (current_function_is_thunk)
7089 {
7090 *imaskP = 0;
7091 *fmaskP = 0;
7092 return;
7093 }
7094
7095 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7096 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7097
7098 /* One for every register we have to save. */
7099 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7100 if (! fixed_regs[i] && ! call_used_regs[i]
7101 && regs_ever_live[i] && i != REG_RA
7102 && (!TARGET_ABI_UNICOSMK || i != HARD_FRAME_POINTER_REGNUM))
7103 {
7104 if (i < 32)
7105 imask |= (1UL << i);
7106 else
7107 fmask |= (1UL << (i - 32));
7108 }
7109
7110 /* We need to restore these for the handler. */
7111 if (current_function_calls_eh_return)
7112 {
7113 for (i = 0; ; ++i)
7114 {
7115 unsigned regno = EH_RETURN_DATA_REGNO (i);
7116 if (regno == INVALID_REGNUM)
7117 break;
7118 imask |= 1UL << regno;
7119 }
7120 }
7121
7122 /* If any register spilled, then spill the return address also. */
7123 /* ??? This is required by the Digital stack unwind specification
7124 and isn't needed if we're doing Dwarf2 unwinding. */
7125 if (imask || fmask || alpha_ra_ever_killed ())
7126 imask |= (1UL << REG_RA);
7127
7128 *imaskP = imask;
7129 *fmaskP = fmask;
7130 }
7131
7132 int
7133 alpha_sa_size (void)
7134 {
7135 unsigned long mask[2];
7136 int sa_size = 0;
7137 int i, j;
7138
7139 alpha_sa_mask (&mask[0], &mask[1]);
7140
7141 if (TARGET_ABI_UNICOSMK)
7142 {
7143 if (mask[0] || mask[1])
7144 sa_size = 14;
7145 }
7146 else
7147 {
7148 for (j = 0; j < 2; ++j)
7149 for (i = 0; i < 32; ++i)
7150 if ((mask[j] >> i) & 1)
7151 sa_size++;
7152 }
7153
7154 if (TARGET_ABI_UNICOSMK)
7155 {
7156 /* We might not need to generate a frame if we don't make any calls
7157 (including calls to __T3E_MISMATCH if this is a vararg function),
7158 don't have any local variables which require stack slots, don't
7159 use alloca and have not determined that we need a frame for other
7160 reasons. */
7161
7162 alpha_procedure_type
7163 = (sa_size || get_frame_size() != 0
7164 || current_function_outgoing_args_size
7165 || current_function_stdarg || current_function_calls_alloca
7166 || frame_pointer_needed)
7167 ? PT_STACK : PT_REGISTER;
7168
7169 /* Always reserve space for saving callee-saved registers if we
7170 need a frame as required by the calling convention. */
7171 if (alpha_procedure_type == PT_STACK)
7172 sa_size = 14;
7173 }
7174 else if (TARGET_ABI_OPEN_VMS)
7175 {
7176 /* Start by assuming we can use a register procedure if we don't
7177 make any calls (REG_RA not used) or need to save any
7178 registers and a stack procedure if we do. */
7179 if ((mask[0] >> REG_RA) & 1)
7180 alpha_procedure_type = PT_STACK;
7181 else if (get_frame_size() != 0)
7182 alpha_procedure_type = PT_REGISTER;
7183 else
7184 alpha_procedure_type = PT_NULL;
7185
7186 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7187 made the final decision on stack procedure vs register procedure. */
7188 if (alpha_procedure_type == PT_STACK)
7189 sa_size -= 2;
7190
7191 /* Decide whether to refer to objects off our PV via FP or PV.
7192 If we need FP for something else or if we receive a nonlocal
7193 goto (which expects PV to contain the value), we must use PV.
7194 Otherwise, start by assuming we can use FP. */
7195
7196 vms_base_regno
7197 = (frame_pointer_needed
7198 || current_function_has_nonlocal_label
7199 || alpha_procedure_type == PT_STACK
7200 || current_function_outgoing_args_size)
7201 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7202
7203 /* If we want to copy PV into FP, we need to find some register
7204 in which to save FP. */
7205
7206 vms_save_fp_regno = -1;
7207 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7208 for (i = 0; i < 32; i++)
7209 if (! fixed_regs[i] && call_used_regs[i] && ! regs_ever_live[i])
7210 vms_save_fp_regno = i;
7211
7212 if (vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7213 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7214 else if (alpha_procedure_type == PT_NULL)
7215 vms_base_regno = REG_PV;
7216
7217 /* Stack unwinding should be done via FP unless we use it for PV. */
7218 vms_unwind_regno = (vms_base_regno == REG_PV
7219 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7220
7221 /* If this is a stack procedure, allow space for saving FP and RA. */
7222 if (alpha_procedure_type == PT_STACK)
7223 sa_size += 2;
7224 }
7225 else
7226 {
7227 /* Our size must be even (multiple of 16 bytes). */
7228 if (sa_size & 1)
7229 sa_size++;
7230 }
7231
7232 return sa_size * 8;
7233 }
7234
7235 /* Define the offset between two registers, one to be eliminated,
7236 and the other its replacement, at the start of a routine. */
7237
7238 HOST_WIDE_INT
7239 alpha_initial_elimination_offset (unsigned int from,
7240 unsigned int to ATTRIBUTE_UNUSED)
7241 {
7242 HOST_WIDE_INT ret;
7243
7244 ret = alpha_sa_size ();
7245 ret += ALPHA_ROUND (current_function_outgoing_args_size);
7246
7247 switch (from)
7248 {
7249 case FRAME_POINTER_REGNUM:
7250 break;
7251
7252 case ARG_POINTER_REGNUM:
7253 ret += (ALPHA_ROUND (get_frame_size ()
7254 + current_function_pretend_args_size)
7255 - current_function_pretend_args_size);
7256 break;
7257
7258 default:
7259 gcc_unreachable ();
7260 }
7261
7262 return ret;
7263 }
7264
7265 int
7266 alpha_pv_save_size (void)
7267 {
7268 alpha_sa_size ();
7269 return alpha_procedure_type == PT_STACK ? 8 : 0;
7270 }
7271
7272 int
7273 alpha_using_fp (void)
7274 {
7275 alpha_sa_size ();
7276 return vms_unwind_regno == HARD_FRAME_POINTER_REGNUM;
7277 }
7278
7279 #if TARGET_ABI_OPEN_VMS
7280
7281 const struct attribute_spec vms_attribute_table[] =
7282 {
7283 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7284 { "overlaid", 0, 0, true, false, false, NULL },
7285 { "global", 0, 0, true, false, false, NULL },
7286 { "initialize", 0, 0, true, false, false, NULL },
7287 { NULL, 0, 0, false, false, false, NULL }
7288 };
7289
7290 #endif
7291
7292 static int
7293 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7294 {
7295 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7296 }
7297
7298 int
7299 alpha_find_lo_sum_using_gp (rtx insn)
7300 {
7301 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7302 }
7303
7304 static int
7305 alpha_does_function_need_gp (void)
7306 {
7307 rtx insn;
7308
7309 /* The GP being variable is an OSF abi thing. */
7310 if (! TARGET_ABI_OSF)
7311 return 0;
7312
7313 /* We need the gp to load the address of __mcount. */
7314 if (TARGET_PROFILING_NEEDS_GP && current_function_profile)
7315 return 1;
7316
7317 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7318 if (current_function_is_thunk)
7319 return 1;
7320
7321 /* The nonlocal receiver pattern assumes that the gp is valid for
7322 the nested function. Reasonable because it's almost always set
7323 correctly already. For the cases where that's wrong, make sure
7324 the nested function loads its gp on entry. */
7325 if (current_function_has_nonlocal_goto)
7326 return 1;
7327
7328 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7329 Even if we are a static function, we still need to do this in case
7330 our address is taken and passed to something like qsort. */
7331
7332 push_topmost_sequence ();
7333 insn = get_insns ();
7334 pop_topmost_sequence ();
7335
7336 for (; insn; insn = NEXT_INSN (insn))
7337 if (INSN_P (insn)
7338 && ! JUMP_TABLE_DATA_P (insn)
7339 && GET_CODE (PATTERN (insn)) != USE
7340 && GET_CODE (PATTERN (insn)) != CLOBBER
7341 && get_attr_usegp (insn))
7342 return 1;
7343
7344 return 0;
7345 }
7346
7347 \f
7348 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7349 sequences. */
7350
7351 static rtx
7352 set_frame_related_p (void)
7353 {
7354 rtx seq = get_insns ();
7355 rtx insn;
7356
7357 end_sequence ();
7358
7359 if (!seq)
7360 return NULL_RTX;
7361
7362 if (INSN_P (seq))
7363 {
7364 insn = seq;
7365 while (insn != NULL_RTX)
7366 {
7367 RTX_FRAME_RELATED_P (insn) = 1;
7368 insn = NEXT_INSN (insn);
7369 }
7370 seq = emit_insn (seq);
7371 }
7372 else
7373 {
7374 seq = emit_insn (seq);
7375 RTX_FRAME_RELATED_P (seq) = 1;
7376 }
7377 return seq;
7378 }
7379
7380 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7381
7382 /* Generates a store with the proper unwind info attached. VALUE is
7383 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7384 contains SP+FRAME_BIAS, and that is the unwind info that should be
7385 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7386 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7387
7388 static void
7389 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7390 HOST_WIDE_INT base_ofs, rtx frame_reg)
7391 {
7392 rtx addr, mem, insn;
7393
7394 addr = plus_constant (base_reg, base_ofs);
7395 mem = gen_rtx_MEM (DImode, addr);
7396 set_mem_alias_set (mem, alpha_sr_alias_set);
7397
7398 insn = emit_move_insn (mem, value);
7399 RTX_FRAME_RELATED_P (insn) = 1;
7400
7401 if (frame_bias || value != frame_reg)
7402 {
7403 if (frame_bias)
7404 {
7405 addr = plus_constant (stack_pointer_rtx, frame_bias + base_ofs);
7406 mem = gen_rtx_MEM (DImode, addr);
7407 }
7408
7409 REG_NOTES (insn)
7410 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7411 gen_rtx_SET (VOIDmode, mem, frame_reg),
7412 REG_NOTES (insn));
7413 }
7414 }
7415
7416 static void
7417 emit_frame_store (unsigned int regno, rtx base_reg,
7418 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7419 {
7420 rtx reg = gen_rtx_REG (DImode, regno);
7421 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7422 }
7423
7424 /* Write function prologue. */
7425
7426 /* On vms we have two kinds of functions:
7427
7428 - stack frame (PROC_STACK)
7429 these are 'normal' functions with local vars and which are
7430 calling other functions
7431 - register frame (PROC_REGISTER)
7432 keeps all data in registers, needs no stack
7433
7434 We must pass this to the assembler so it can generate the
7435 proper pdsc (procedure descriptor)
7436 This is done with the '.pdesc' command.
7437
7438 On not-vms, we don't really differentiate between the two, as we can
7439 simply allocate stack without saving registers. */
7440
7441 void
7442 alpha_expand_prologue (void)
7443 {
7444 /* Registers to save. */
7445 unsigned long imask = 0;
7446 unsigned long fmask = 0;
7447 /* Stack space needed for pushing registers clobbered by us. */
7448 HOST_WIDE_INT sa_size;
7449 /* Complete stack size needed. */
7450 HOST_WIDE_INT frame_size;
7451 /* Offset from base reg to register save area. */
7452 HOST_WIDE_INT reg_offset;
7453 rtx sa_reg;
7454 int i;
7455
7456 sa_size = alpha_sa_size ();
7457
7458 frame_size = get_frame_size ();
7459 if (TARGET_ABI_OPEN_VMS)
7460 frame_size = ALPHA_ROUND (sa_size
7461 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7462 + frame_size
7463 + current_function_pretend_args_size);
7464 else if (TARGET_ABI_UNICOSMK)
7465 /* We have to allocate space for the DSIB if we generate a frame. */
7466 frame_size = ALPHA_ROUND (sa_size
7467 + (alpha_procedure_type == PT_STACK ? 48 : 0))
7468 + ALPHA_ROUND (frame_size
7469 + current_function_outgoing_args_size);
7470 else
7471 frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
7472 + sa_size
7473 + ALPHA_ROUND (frame_size
7474 + current_function_pretend_args_size));
7475
7476 if (TARGET_ABI_OPEN_VMS)
7477 reg_offset = 8;
7478 else
7479 reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
7480
7481 alpha_sa_mask (&imask, &fmask);
7482
7483 /* Emit an insn to reload GP, if needed. */
7484 if (TARGET_ABI_OSF)
7485 {
7486 alpha_function_needs_gp = alpha_does_function_need_gp ();
7487 if (alpha_function_needs_gp)
7488 emit_insn (gen_prologue_ldgp ());
7489 }
7490
7491 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7492 the call to mcount ourselves, rather than having the linker do it
7493 magically in response to -pg. Since _mcount has special linkage,
7494 don't represent the call as a call. */
7495 if (TARGET_PROFILING_NEEDS_GP && current_function_profile)
7496 emit_insn (gen_prologue_mcount ());
7497
7498 if (TARGET_ABI_UNICOSMK)
7499 unicosmk_gen_dsib (&imask);
7500
7501 /* Adjust the stack by the frame size. If the frame size is > 4096
7502 bytes, we need to be sure we probe somewhere in the first and last
7503 4096 bytes (we can probably get away without the latter test) and
7504 every 8192 bytes in between. If the frame size is > 32768, we
7505 do this in a loop. Otherwise, we generate the explicit probe
7506 instructions.
7507
7508 Note that we are only allowed to adjust sp once in the prologue. */
7509
7510 if (frame_size <= 32768)
7511 {
7512 if (frame_size > 4096)
7513 {
7514 int probed;
7515
7516 for (probed = 4096; probed < frame_size; probed += 8192)
7517 emit_insn (gen_probe_stack (GEN_INT (TARGET_ABI_UNICOSMK
7518 ? -probed + 64
7519 : -probed)));
7520
7521 /* We only have to do this probe if we aren't saving registers. */
7522 if (sa_size == 0 && frame_size > probed - 4096)
7523 emit_insn (gen_probe_stack (GEN_INT (-frame_size)));
7524 }
7525
7526 if (frame_size != 0)
7527 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7528 GEN_INT (TARGET_ABI_UNICOSMK
7529 ? -frame_size + 64
7530 : -frame_size))));
7531 }
7532 else
7533 {
7534 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7535 number of 8192 byte blocks to probe. We then probe each block
7536 in the loop and then set SP to the proper location. If the
7537 amount remaining is > 4096, we have to do one more probe if we
7538 are not saving any registers. */
7539
7540 HOST_WIDE_INT blocks = (frame_size + 4096) / 8192;
7541 HOST_WIDE_INT leftover = frame_size + 4096 - blocks * 8192;
7542 rtx ptr = gen_rtx_REG (DImode, 22);
7543 rtx count = gen_rtx_REG (DImode, 23);
7544 rtx seq;
7545
7546 emit_move_insn (count, GEN_INT (blocks));
7547 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx,
7548 GEN_INT (TARGET_ABI_UNICOSMK ? 4096 - 64 : 4096)));
7549
7550 /* Because of the difficulty in emitting a new basic block this
7551 late in the compilation, generate the loop as a single insn. */
7552 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7553
7554 if (leftover > 4096 && sa_size == 0)
7555 {
7556 rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
7557 MEM_VOLATILE_P (last) = 1;
7558 emit_move_insn (last, const0_rtx);
7559 }
7560
7561 if (TARGET_ABI_WINDOWS_NT)
7562 {
7563 /* For NT stack unwind (done by 'reverse execution'), it's
7564 not OK to take the result of a loop, even though the value
7565 is already in ptr, so we reload it via a single operation
7566 and subtract it to sp.
7567
7568 Yes, that's correct -- we have to reload the whole constant
7569 into a temporary via ldah+lda then subtract from sp. */
7570
7571 HOST_WIDE_INT lo, hi;
7572 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7573 hi = frame_size - lo;
7574
7575 emit_move_insn (ptr, GEN_INT (hi));
7576 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7577 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7578 ptr));
7579 }
7580 else
7581 {
7582 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7583 GEN_INT (-leftover)));
7584 }
7585
7586 /* This alternative is special, because the DWARF code cannot
7587 possibly intuit through the loop above. So we invent this
7588 note it looks at instead. */
7589 RTX_FRAME_RELATED_P (seq) = 1;
7590 REG_NOTES (seq)
7591 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7592 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7593 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7594 GEN_INT (TARGET_ABI_UNICOSMK
7595 ? -frame_size + 64
7596 : -frame_size))),
7597 REG_NOTES (seq));
7598 }
7599
7600 if (!TARGET_ABI_UNICOSMK)
7601 {
7602 HOST_WIDE_INT sa_bias = 0;
7603
7604 /* Cope with very large offsets to the register save area. */
7605 sa_reg = stack_pointer_rtx;
7606 if (reg_offset + sa_size > 0x8000)
7607 {
7608 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7609 rtx sa_bias_rtx;
7610
7611 if (low + sa_size <= 0x8000)
7612 sa_bias = reg_offset - low, reg_offset = low;
7613 else
7614 sa_bias = reg_offset, reg_offset = 0;
7615
7616 sa_reg = gen_rtx_REG (DImode, 24);
7617 sa_bias_rtx = GEN_INT (sa_bias);
7618
7619 if (add_operand (sa_bias_rtx, DImode))
7620 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7621 else
7622 {
7623 emit_move_insn (sa_reg, sa_bias_rtx);
7624 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7625 }
7626 }
7627
7628 /* Save regs in stack order. Beginning with VMS PV. */
7629 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7630 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7631
7632 /* Save register RA next. */
7633 if (imask & (1UL << REG_RA))
7634 {
7635 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7636 imask &= ~(1UL << REG_RA);
7637 reg_offset += 8;
7638 }
7639
7640 /* Now save any other registers required to be saved. */
7641 for (i = 0; i < 31; i++)
7642 if (imask & (1UL << i))
7643 {
7644 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7645 reg_offset += 8;
7646 }
7647
7648 for (i = 0; i < 31; i++)
7649 if (fmask & (1UL << i))
7650 {
7651 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7652 reg_offset += 8;
7653 }
7654 }
7655 else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK)
7656 {
7657 /* The standard frame on the T3E includes space for saving registers.
7658 We just have to use it. We don't have to save the return address and
7659 the old frame pointer here - they are saved in the DSIB. */
7660
7661 reg_offset = -56;
7662 for (i = 9; i < 15; i++)
7663 if (imask & (1UL << i))
7664 {
7665 emit_frame_store (i, hard_frame_pointer_rtx, 0, reg_offset);
7666 reg_offset -= 8;
7667 }
7668 for (i = 2; i < 10; i++)
7669 if (fmask & (1UL << i))
7670 {
7671 emit_frame_store (i+32, hard_frame_pointer_rtx, 0, reg_offset);
7672 reg_offset -= 8;
7673 }
7674 }
7675
7676 if (TARGET_ABI_OPEN_VMS)
7677 {
7678 if (alpha_procedure_type == PT_REGISTER)
7679 /* Register frame procedures save the fp.
7680 ?? Ought to have a dwarf2 save for this. */
7681 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7682 hard_frame_pointer_rtx);
7683
7684 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7685 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7686 gen_rtx_REG (DImode, REG_PV)));
7687
7688 if (alpha_procedure_type != PT_NULL
7689 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7690 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7691
7692 /* If we have to allocate space for outgoing args, do it now. */
7693 if (current_function_outgoing_args_size != 0)
7694 {
7695 rtx seq
7696 = emit_move_insn (stack_pointer_rtx,
7697 plus_constant
7698 (hard_frame_pointer_rtx,
7699 - (ALPHA_ROUND
7700 (current_function_outgoing_args_size))));
7701
7702 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7703 if ! frame_pointer_needed. Setting the bit will change the CFA
7704 computation rule to use sp again, which would be wrong if we had
7705 frame_pointer_needed, as this means sp might move unpredictably
7706 later on.
7707
7708 Also, note that
7709 frame_pointer_needed
7710 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7711 and
7712 current_function_outgoing_args_size != 0
7713 => alpha_procedure_type != PT_NULL,
7714
7715 so when we are not setting the bit here, we are guaranteed to
7716 have emitted an FRP frame pointer update just before. */
7717 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7718 }
7719 }
7720 else if (!TARGET_ABI_UNICOSMK)
7721 {
7722 /* If we need a frame pointer, set it from the stack pointer. */
7723 if (frame_pointer_needed)
7724 {
7725 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7726 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7727 else
7728 /* This must always be the last instruction in the
7729 prologue, thus we emit a special move + clobber. */
7730 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7731 stack_pointer_rtx, sa_reg)));
7732 }
7733 }
7734
7735 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7736 the prologue, for exception handling reasons, we cannot do this for
7737 any insn that might fault. We could prevent this for mems with a
7738 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7739 have to prevent all such scheduling with a blockage.
7740
7741 Linux, on the other hand, never bothered to implement OSF/1's
7742 exception handling, and so doesn't care about such things. Anyone
7743 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7744
7745 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7746 emit_insn (gen_blockage ());
7747 }
7748
7749 /* Count the number of .file directives, so that .loc is up to date. */
7750 int num_source_filenames = 0;
7751
7752 /* Output the textual info surrounding the prologue. */
7753
7754 void
7755 alpha_start_function (FILE *file, const char *fnname,
7756 tree decl ATTRIBUTE_UNUSED)
7757 {
7758 unsigned long imask = 0;
7759 unsigned long fmask = 0;
7760 /* Stack space needed for pushing registers clobbered by us. */
7761 HOST_WIDE_INT sa_size;
7762 /* Complete stack size needed. */
7763 unsigned HOST_WIDE_INT frame_size;
7764 /* The maximum debuggable frame size (512 Kbytes using Tru64 as). */
7765 unsigned HOST_WIDE_INT max_frame_size = TARGET_ABI_OSF && !TARGET_GAS
7766 ? 524288
7767 : 1UL << 31;
7768 /* Offset from base reg to register save area. */
7769 HOST_WIDE_INT reg_offset;
7770 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7771 int i;
7772
7773 /* Don't emit an extern directive for functions defined in the same file. */
7774 if (TARGET_ABI_UNICOSMK)
7775 {
7776 tree name_tree;
7777 name_tree = get_identifier (fnname);
7778 TREE_ASM_WRITTEN (name_tree) = 1;
7779 }
7780
7781 alpha_fnname = fnname;
7782 sa_size = alpha_sa_size ();
7783
7784 frame_size = get_frame_size ();
7785 if (TARGET_ABI_OPEN_VMS)
7786 frame_size = ALPHA_ROUND (sa_size
7787 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7788 + frame_size
7789 + current_function_pretend_args_size);
7790 else if (TARGET_ABI_UNICOSMK)
7791 frame_size = ALPHA_ROUND (sa_size
7792 + (alpha_procedure_type == PT_STACK ? 48 : 0))
7793 + ALPHA_ROUND (frame_size
7794 + current_function_outgoing_args_size);
7795 else
7796 frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
7797 + sa_size
7798 + ALPHA_ROUND (frame_size
7799 + current_function_pretend_args_size));
7800
7801 if (TARGET_ABI_OPEN_VMS)
7802 reg_offset = 8;
7803 else
7804 reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
7805
7806 alpha_sa_mask (&imask, &fmask);
7807
7808 /* Ecoff can handle multiple .file directives, so put out file and lineno.
7809 We have to do that before the .ent directive as we cannot switch
7810 files within procedures with native ecoff because line numbers are
7811 linked to procedure descriptors.
7812 Outputting the lineno helps debugging of one line functions as they
7813 would otherwise get no line number at all. Please note that we would
7814 like to put out last_linenum from final.c, but it is not accessible. */
7815
7816 if (write_symbols == SDB_DEBUG)
7817 {
7818 #ifdef ASM_OUTPUT_SOURCE_FILENAME
7819 ASM_OUTPUT_SOURCE_FILENAME (file,
7820 DECL_SOURCE_FILE (current_function_decl));
7821 #endif
7822 #ifdef SDB_OUTPUT_SOURCE_LINE
7823 if (debug_info_level != DINFO_LEVEL_TERSE)
7824 SDB_OUTPUT_SOURCE_LINE (file,
7825 DECL_SOURCE_LINE (current_function_decl));
7826 #endif
7827 }
7828
7829 /* Issue function start and label. */
7830 if (TARGET_ABI_OPEN_VMS
7831 || (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive))
7832 {
7833 fputs ("\t.ent ", file);
7834 assemble_name (file, fnname);
7835 putc ('\n', file);
7836
7837 /* If the function needs GP, we'll write the "..ng" label there.
7838 Otherwise, do it here. */
7839 if (TARGET_ABI_OSF
7840 && ! alpha_function_needs_gp
7841 && ! current_function_is_thunk)
7842 {
7843 putc ('$', file);
7844 assemble_name (file, fnname);
7845 fputs ("..ng:\n", file);
7846 }
7847 }
7848
7849 strcpy (entry_label, fnname);
7850 if (TARGET_ABI_OPEN_VMS)
7851 strcat (entry_label, "..en");
7852
7853 /* For public functions, the label must be globalized by appending an
7854 additional colon. */
7855 if (TARGET_ABI_UNICOSMK && TREE_PUBLIC (decl))
7856 strcat (entry_label, ":");
7857
7858 ASM_OUTPUT_LABEL (file, entry_label);
7859 inside_function = TRUE;
7860
7861 if (TARGET_ABI_OPEN_VMS)
7862 fprintf (file, "\t.base $%d\n", vms_base_regno);
7863
7864 if (!TARGET_ABI_OPEN_VMS && !TARGET_ABI_UNICOSMK && TARGET_IEEE_CONFORMANT
7865 && !flag_inhibit_size_directive)
7866 {
7867 /* Set flags in procedure descriptor to request IEEE-conformant
7868 math-library routines. The value we set it to is PDSC_EXC_IEEE
7869 (/usr/include/pdsc.h). */
7870 fputs ("\t.eflag 48\n", file);
7871 }
7872
7873 /* Set up offsets to alpha virtual arg/local debugging pointer. */
7874 alpha_auto_offset = -frame_size + current_function_pretend_args_size;
7875 alpha_arg_offset = -frame_size + 48;
7876
7877 /* Describe our frame. If the frame size is larger than an integer,
7878 print it as zero to avoid an assembler error. We won't be
7879 properly describing such a frame, but that's the best we can do. */
7880 if (TARGET_ABI_UNICOSMK)
7881 ;
7882 else if (TARGET_ABI_OPEN_VMS)
7883 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
7884 HOST_WIDE_INT_PRINT_DEC "\n",
7885 vms_unwind_regno,
7886 frame_size >= (1UL << 31) ? 0 : frame_size,
7887 reg_offset);
7888 else if (!flag_inhibit_size_directive)
7889 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
7890 (frame_pointer_needed
7891 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
7892 frame_size >= max_frame_size ? 0 : frame_size,
7893 current_function_pretend_args_size);
7894
7895 /* Describe which registers were spilled. */
7896 if (TARGET_ABI_UNICOSMK)
7897 ;
7898 else if (TARGET_ABI_OPEN_VMS)
7899 {
7900 if (imask)
7901 /* ??? Does VMS care if mask contains ra? The old code didn't
7902 set it, so I don't here. */
7903 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
7904 if (fmask)
7905 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
7906 if (alpha_procedure_type == PT_REGISTER)
7907 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
7908 }
7909 else if (!flag_inhibit_size_directive)
7910 {
7911 if (imask)
7912 {
7913 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
7914 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
7915
7916 for (i = 0; i < 32; ++i)
7917 if (imask & (1UL << i))
7918 reg_offset += 8;
7919 }
7920
7921 if (fmask)
7922 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
7923 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
7924 }
7925
7926 #if TARGET_ABI_OPEN_VMS
7927 /* Ifdef'ed cause link_section are only available then. */
7928 switch_to_section (readonly_data_section);
7929 fprintf (file, "\t.align 3\n");
7930 assemble_name (file, fnname); fputs ("..na:\n", file);
7931 fputs ("\t.ascii \"", file);
7932 assemble_name (file, fnname);
7933 fputs ("\\0\"\n", file);
7934 alpha_need_linkage (fnname, 1);
7935 switch_to_section (text_section);
7936 #endif
7937 }
7938
7939 /* Emit the .prologue note at the scheduled end of the prologue. */
7940
7941 static void
7942 alpha_output_function_end_prologue (FILE *file)
7943 {
7944 if (TARGET_ABI_UNICOSMK)
7945 ;
7946 else if (TARGET_ABI_OPEN_VMS)
7947 fputs ("\t.prologue\n", file);
7948 else if (TARGET_ABI_WINDOWS_NT)
7949 fputs ("\t.prologue 0\n", file);
7950 else if (!flag_inhibit_size_directive)
7951 fprintf (file, "\t.prologue %d\n",
7952 alpha_function_needs_gp || current_function_is_thunk);
7953 }
7954
7955 /* Write function epilogue. */
7956
7957 /* ??? At some point we will want to support full unwind, and so will
7958 need to mark the epilogue as well. At the moment, we just confuse
7959 dwarf2out. */
7960 #undef FRP
7961 #define FRP(exp) exp
7962
7963 void
7964 alpha_expand_epilogue (void)
7965 {
7966 /* Registers to save. */
7967 unsigned long imask = 0;
7968 unsigned long fmask = 0;
7969 /* Stack space needed for pushing registers clobbered by us. */
7970 HOST_WIDE_INT sa_size;
7971 /* Complete stack size needed. */
7972 HOST_WIDE_INT frame_size;
7973 /* Offset from base reg to register save area. */
7974 HOST_WIDE_INT reg_offset;
7975 int fp_is_frame_pointer, fp_offset;
7976 rtx sa_reg, sa_reg_exp = NULL;
7977 rtx sp_adj1, sp_adj2, mem;
7978 rtx eh_ofs;
7979 int i;
7980
7981 sa_size = alpha_sa_size ();
7982
7983 frame_size = get_frame_size ();
7984 if (TARGET_ABI_OPEN_VMS)
7985 frame_size = ALPHA_ROUND (sa_size
7986 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7987 + frame_size
7988 + current_function_pretend_args_size);
7989 else if (TARGET_ABI_UNICOSMK)
7990 frame_size = ALPHA_ROUND (sa_size
7991 + (alpha_procedure_type == PT_STACK ? 48 : 0))
7992 + ALPHA_ROUND (frame_size
7993 + current_function_outgoing_args_size);
7994 else
7995 frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
7996 + sa_size
7997 + ALPHA_ROUND (frame_size
7998 + current_function_pretend_args_size));
7999
8000 if (TARGET_ABI_OPEN_VMS)
8001 {
8002 if (alpha_procedure_type == PT_STACK)
8003 reg_offset = 8;
8004 else
8005 reg_offset = 0;
8006 }
8007 else
8008 reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
8009
8010 alpha_sa_mask (&imask, &fmask);
8011
8012 fp_is_frame_pointer
8013 = ((TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
8014 || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed));
8015 fp_offset = 0;
8016 sa_reg = stack_pointer_rtx;
8017
8018 if (current_function_calls_eh_return)
8019 eh_ofs = EH_RETURN_STACKADJ_RTX;
8020 else
8021 eh_ofs = NULL_RTX;
8022
8023 if (!TARGET_ABI_UNICOSMK && sa_size)
8024 {
8025 /* If we have a frame pointer, restore SP from it. */
8026 if ((TARGET_ABI_OPEN_VMS
8027 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
8028 || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed))
8029 FRP (emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx));
8030
8031 /* Cope with very large offsets to the register save area. */
8032 if (reg_offset + sa_size > 0x8000)
8033 {
8034 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8035 HOST_WIDE_INT bias;
8036
8037 if (low + sa_size <= 0x8000)
8038 bias = reg_offset - low, reg_offset = low;
8039 else
8040 bias = reg_offset, reg_offset = 0;
8041
8042 sa_reg = gen_rtx_REG (DImode, 22);
8043 sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
8044
8045 FRP (emit_move_insn (sa_reg, sa_reg_exp));
8046 }
8047
8048 /* Restore registers in order, excepting a true frame pointer. */
8049
8050 mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
8051 if (! eh_ofs)
8052 set_mem_alias_set (mem, alpha_sr_alias_set);
8053 FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
8054
8055 reg_offset += 8;
8056 imask &= ~(1UL << REG_RA);
8057
8058 for (i = 0; i < 31; ++i)
8059 if (imask & (1UL << i))
8060 {
8061 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8062 fp_offset = reg_offset;
8063 else
8064 {
8065 mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, reg_offset));
8066 set_mem_alias_set (mem, alpha_sr_alias_set);
8067 FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
8068 }
8069 reg_offset += 8;
8070 }
8071
8072 for (i = 0; i < 31; ++i)
8073 if (fmask & (1UL << i))
8074 {
8075 mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset));
8076 set_mem_alias_set (mem, alpha_sr_alias_set);
8077 FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
8078 reg_offset += 8;
8079 }
8080 }
8081 else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK)
8082 {
8083 /* Restore callee-saved general-purpose registers. */
8084
8085 reg_offset = -56;
8086
8087 for (i = 9; i < 15; i++)
8088 if (imask & (1UL << i))
8089 {
8090 mem = gen_rtx_MEM (DImode, plus_constant(hard_frame_pointer_rtx,
8091 reg_offset));
8092 set_mem_alias_set (mem, alpha_sr_alias_set);
8093 FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
8094 reg_offset -= 8;
8095 }
8096
8097 for (i = 2; i < 10; i++)
8098 if (fmask & (1UL << i))
8099 {
8100 mem = gen_rtx_MEM (DFmode, plus_constant(hard_frame_pointer_rtx,
8101 reg_offset));
8102 set_mem_alias_set (mem, alpha_sr_alias_set);
8103 FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
8104 reg_offset -= 8;
8105 }
8106
8107 /* Restore the return address from the DSIB. */
8108
8109 mem = gen_rtx_MEM (DImode, plus_constant(hard_frame_pointer_rtx, -8));
8110 set_mem_alias_set (mem, alpha_sr_alias_set);
8111 FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
8112 }
8113
8114 if (frame_size || eh_ofs)
8115 {
8116 sp_adj1 = stack_pointer_rtx;
8117
8118 if (eh_ofs)
8119 {
8120 sp_adj1 = gen_rtx_REG (DImode, 23);
8121 emit_move_insn (sp_adj1,
8122 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8123 }
8124
8125 /* If the stack size is large, begin computation into a temporary
8126 register so as not to interfere with a potential fp restore,
8127 which must be consecutive with an SP restore. */
8128 if (frame_size < 32768
8129 && ! (TARGET_ABI_UNICOSMK && current_function_calls_alloca))
8130 sp_adj2 = GEN_INT (frame_size);
8131 else if (TARGET_ABI_UNICOSMK)
8132 {
8133 sp_adj1 = gen_rtx_REG (DImode, 23);
8134 FRP (emit_move_insn (sp_adj1, hard_frame_pointer_rtx));
8135 sp_adj2 = const0_rtx;
8136 }
8137 else if (frame_size < 0x40007fffL)
8138 {
8139 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8140
8141 sp_adj2 = plus_constant (sp_adj1, frame_size - low);
8142 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8143 sp_adj1 = sa_reg;
8144 else
8145 {
8146 sp_adj1 = gen_rtx_REG (DImode, 23);
8147 FRP (emit_move_insn (sp_adj1, sp_adj2));
8148 }
8149 sp_adj2 = GEN_INT (low);
8150 }
8151 else
8152 {
8153 rtx tmp = gen_rtx_REG (DImode, 23);
8154 FRP (sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size,
8155 3, false));
8156 if (!sp_adj2)
8157 {
8158 /* We can't drop new things to memory this late, afaik,
8159 so build it up by pieces. */
8160 FRP (sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8161 -(frame_size < 0)));
8162 gcc_assert (sp_adj2);
8163 }
8164 }
8165
8166 /* From now on, things must be in order. So emit blockages. */
8167
8168 /* Restore the frame pointer. */
8169 if (TARGET_ABI_UNICOSMK)
8170 {
8171 emit_insn (gen_blockage ());
8172 mem = gen_rtx_MEM (DImode,
8173 plus_constant (hard_frame_pointer_rtx, -16));
8174 set_mem_alias_set (mem, alpha_sr_alias_set);
8175 FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
8176 }
8177 else if (fp_is_frame_pointer)
8178 {
8179 emit_insn (gen_blockage ());
8180 mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, fp_offset));
8181 set_mem_alias_set (mem, alpha_sr_alias_set);
8182 FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
8183 }
8184 else if (TARGET_ABI_OPEN_VMS)
8185 {
8186 emit_insn (gen_blockage ());
8187 FRP (emit_move_insn (hard_frame_pointer_rtx,
8188 gen_rtx_REG (DImode, vms_save_fp_regno)));
8189 }
8190
8191 /* Restore the stack pointer. */
8192 emit_insn (gen_blockage ());
8193 if (sp_adj2 == const0_rtx)
8194 FRP (emit_move_insn (stack_pointer_rtx, sp_adj1));
8195 else
8196 FRP (emit_move_insn (stack_pointer_rtx,
8197 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)));
8198 }
8199 else
8200 {
8201 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8202 {
8203 emit_insn (gen_blockage ());
8204 FRP (emit_move_insn (hard_frame_pointer_rtx,
8205 gen_rtx_REG (DImode, vms_save_fp_regno)));
8206 }
8207 else if (TARGET_ABI_UNICOSMK && alpha_procedure_type != PT_STACK)
8208 {
8209 /* Decrement the frame pointer if the function does not have a
8210 frame. */
8211
8212 emit_insn (gen_blockage ());
8213 FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
8214 hard_frame_pointer_rtx, constm1_rtx)));
8215 }
8216 }
8217 }
8218 \f
8219 /* Output the rest of the textual info surrounding the epilogue. */
8220
8221 void
8222 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8223 {
8224 #if TARGET_ABI_OPEN_VMS
8225 alpha_write_linkage (file, fnname, decl);
8226 #endif
8227
8228 /* End the function. */
8229 if (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive)
8230 {
8231 fputs ("\t.end ", file);
8232 assemble_name (file, fnname);
8233 putc ('\n', file);
8234 }
8235 inside_function = FALSE;
8236
8237 /* Output jump tables and the static subroutine information block. */
8238 if (TARGET_ABI_UNICOSMK)
8239 {
8240 unicosmk_output_ssib (file, fnname);
8241 unicosmk_output_deferred_case_vectors (file);
8242 }
8243 }
8244
8245 #if TARGET_ABI_OSF
8246 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8247
8248 In order to avoid the hordes of differences between generated code
8249 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8250 lots of code loading up large constants, generate rtl and emit it
8251 instead of going straight to text.
8252
8253 Not sure why this idea hasn't been explored before... */
8254
8255 static void
8256 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8257 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8258 tree function)
8259 {
8260 HOST_WIDE_INT hi, lo;
8261 rtx this, insn, funexp;
8262
8263 reset_block_changes ();
8264
8265 /* We always require a valid GP. */
8266 emit_insn (gen_prologue_ldgp ());
8267 emit_note (NOTE_INSN_PROLOGUE_END);
8268
8269 /* Find the "this" pointer. If the function returns a structure,
8270 the structure return pointer is in $16. */
8271 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8272 this = gen_rtx_REG (Pmode, 17);
8273 else
8274 this = gen_rtx_REG (Pmode, 16);
8275
8276 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8277 entire constant for the add. */
8278 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8279 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8280 if (hi + lo == delta)
8281 {
8282 if (hi)
8283 emit_insn (gen_adddi3 (this, this, GEN_INT (hi)));
8284 if (lo)
8285 emit_insn (gen_adddi3 (this, this, GEN_INT (lo)));
8286 }
8287 else
8288 {
8289 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8290 delta, -(delta < 0));
8291 emit_insn (gen_adddi3 (this, this, tmp));
8292 }
8293
8294 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8295 if (vcall_offset)
8296 {
8297 rtx tmp, tmp2;
8298
8299 tmp = gen_rtx_REG (Pmode, 0);
8300 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8301
8302 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8303 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8304 if (hi + lo == vcall_offset)
8305 {
8306 if (hi)
8307 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8308 }
8309 else
8310 {
8311 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8312 vcall_offset, -(vcall_offset < 0));
8313 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8314 lo = 0;
8315 }
8316 if (lo)
8317 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8318 else
8319 tmp2 = tmp;
8320 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8321
8322 emit_insn (gen_adddi3 (this, this, tmp));
8323 }
8324
8325 /* Generate a tail call to the target function. */
8326 if (! TREE_USED (function))
8327 {
8328 assemble_external (function);
8329 TREE_USED (function) = 1;
8330 }
8331 funexp = XEXP (DECL_RTL (function), 0);
8332 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8333 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8334 SIBLING_CALL_P (insn) = 1;
8335
8336 /* Run just enough of rest_of_compilation to get the insns emitted.
8337 There's not really enough bulk here to make other passes such as
8338 instruction scheduling worth while. Note that use_thunk calls
8339 assemble_start_function and assemble_end_function. */
8340 insn = get_insns ();
8341 insn_locators_initialize ();
8342 shorten_branches (insn);
8343 final_start_function (insn, file, 1);
8344 final (insn, file, 1);
8345 final_end_function ();
8346 }
8347 #endif /* TARGET_ABI_OSF */
8348 \f
8349 /* Debugging support. */
8350
8351 #include "gstab.h"
8352
8353 /* Count the number of sdb related labels are generated (to find block
8354 start and end boundaries). */
8355
8356 int sdb_label_count = 0;
8357
8358 /* Name of the file containing the current function. */
8359
8360 static const char *current_function_file = "";
8361
8362 /* Offsets to alpha virtual arg/local debugging pointers. */
8363
8364 long alpha_arg_offset;
8365 long alpha_auto_offset;
8366 \f
8367 /* Emit a new filename to a stream. */
8368
8369 void
8370 alpha_output_filename (FILE *stream, const char *name)
8371 {
8372 static int first_time = TRUE;
8373
8374 if (first_time)
8375 {
8376 first_time = FALSE;
8377 ++num_source_filenames;
8378 current_function_file = name;
8379 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8380 output_quoted_string (stream, name);
8381 fprintf (stream, "\n");
8382 if (!TARGET_GAS && write_symbols == DBX_DEBUG)
8383 fprintf (stream, "\t#@stabs\n");
8384 }
8385
8386 else if (write_symbols == DBX_DEBUG)
8387 /* dbxout.c will emit an appropriate .stabs directive. */
8388 return;
8389
8390 else if (name != current_function_file
8391 && strcmp (name, current_function_file) != 0)
8392 {
8393 if (inside_function && ! TARGET_GAS)
8394 fprintf (stream, "\t#.file\t%d ", num_source_filenames);
8395 else
8396 {
8397 ++num_source_filenames;
8398 current_function_file = name;
8399 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8400 }
8401
8402 output_quoted_string (stream, name);
8403 fprintf (stream, "\n");
8404 }
8405 }
8406 \f
8407 /* Structure to show the current status of registers and memory. */
8408
8409 struct shadow_summary
8410 {
8411 struct {
8412 unsigned int i : 31; /* Mask of int regs */
8413 unsigned int fp : 31; /* Mask of fp regs */
8414 unsigned int mem : 1; /* mem == imem | fpmem */
8415 } used, defd;
8416 };
8417
8418 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8419 to the summary structure. SET is nonzero if the insn is setting the
8420 object, otherwise zero. */
8421
8422 static void
8423 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8424 {
8425 const char *format_ptr;
8426 int i, j;
8427
8428 if (x == 0)
8429 return;
8430
8431 switch (GET_CODE (x))
8432 {
8433 /* ??? Note that this case would be incorrect if the Alpha had a
8434 ZERO_EXTRACT in SET_DEST. */
8435 case SET:
8436 summarize_insn (SET_SRC (x), sum, 0);
8437 summarize_insn (SET_DEST (x), sum, 1);
8438 break;
8439
8440 case CLOBBER:
8441 summarize_insn (XEXP (x, 0), sum, 1);
8442 break;
8443
8444 case USE:
8445 summarize_insn (XEXP (x, 0), sum, 0);
8446 break;
8447
8448 case ASM_OPERANDS:
8449 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8450 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8451 break;
8452
8453 case PARALLEL:
8454 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8455 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8456 break;
8457
8458 case SUBREG:
8459 summarize_insn (SUBREG_REG (x), sum, 0);
8460 break;
8461
8462 case REG:
8463 {
8464 int regno = REGNO (x);
8465 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8466
8467 if (regno == 31 || regno == 63)
8468 break;
8469
8470 if (set)
8471 {
8472 if (regno < 32)
8473 sum->defd.i |= mask;
8474 else
8475 sum->defd.fp |= mask;
8476 }
8477 else
8478 {
8479 if (regno < 32)
8480 sum->used.i |= mask;
8481 else
8482 sum->used.fp |= mask;
8483 }
8484 }
8485 break;
8486
8487 case MEM:
8488 if (set)
8489 sum->defd.mem = 1;
8490 else
8491 sum->used.mem = 1;
8492
8493 /* Find the regs used in memory address computation: */
8494 summarize_insn (XEXP (x, 0), sum, 0);
8495 break;
8496
8497 case CONST_INT: case CONST_DOUBLE:
8498 case SYMBOL_REF: case LABEL_REF: case CONST:
8499 case SCRATCH: case ASM_INPUT:
8500 break;
8501
8502 /* Handle common unary and binary ops for efficiency. */
8503 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8504 case MOD: case UDIV: case UMOD: case AND: case IOR:
8505 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8506 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8507 case NE: case EQ: case GE: case GT: case LE:
8508 case LT: case GEU: case GTU: case LEU: case LTU:
8509 summarize_insn (XEXP (x, 0), sum, 0);
8510 summarize_insn (XEXP (x, 1), sum, 0);
8511 break;
8512
8513 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8514 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8515 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8516 case SQRT: case FFS:
8517 summarize_insn (XEXP (x, 0), sum, 0);
8518 break;
8519
8520 default:
8521 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8522 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8523 switch (format_ptr[i])
8524 {
8525 case 'e':
8526 summarize_insn (XEXP (x, i), sum, 0);
8527 break;
8528
8529 case 'E':
8530 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8531 summarize_insn (XVECEXP (x, i, j), sum, 0);
8532 break;
8533
8534 case 'i':
8535 break;
8536
8537 default:
8538 gcc_unreachable ();
8539 }
8540 }
8541 }
8542
8543 /* Ensure a sufficient number of `trapb' insns are in the code when
8544 the user requests code with a trap precision of functions or
8545 instructions.
8546
8547 In naive mode, when the user requests a trap-precision of
8548 "instruction", a trapb is needed after every instruction that may
8549 generate a trap. This ensures that the code is resumption safe but
8550 it is also slow.
8551
8552 When optimizations are turned on, we delay issuing a trapb as long
8553 as possible. In this context, a trap shadow is the sequence of
8554 instructions that starts with a (potentially) trap generating
8555 instruction and extends to the next trapb or call_pal instruction
8556 (but GCC never generates call_pal by itself). We can delay (and
8557 therefore sometimes omit) a trapb subject to the following
8558 conditions:
8559
8560 (a) On entry to the trap shadow, if any Alpha register or memory
8561 location contains a value that is used as an operand value by some
8562 instruction in the trap shadow (live on entry), then no instruction
8563 in the trap shadow may modify the register or memory location.
8564
8565 (b) Within the trap shadow, the computation of the base register
8566 for a memory load or store instruction may not involve using the
8567 result of an instruction that might generate an UNPREDICTABLE
8568 result.
8569
8570 (c) Within the trap shadow, no register may be used more than once
8571 as a destination register. (This is to make life easier for the
8572 trap-handler.)
8573
8574 (d) The trap shadow may not include any branch instructions. */
8575
8576 static void
8577 alpha_handle_trap_shadows (void)
8578 {
8579 struct shadow_summary shadow;
8580 int trap_pending, exception_nesting;
8581 rtx i, n;
8582
8583 trap_pending = 0;
8584 exception_nesting = 0;
8585 shadow.used.i = 0;
8586 shadow.used.fp = 0;
8587 shadow.used.mem = 0;
8588 shadow.defd = shadow.used;
8589
8590 for (i = get_insns (); i ; i = NEXT_INSN (i))
8591 {
8592 if (GET_CODE (i) == NOTE)
8593 {
8594 switch (NOTE_LINE_NUMBER (i))
8595 {
8596 case NOTE_INSN_EH_REGION_BEG:
8597 exception_nesting++;
8598 if (trap_pending)
8599 goto close_shadow;
8600 break;
8601
8602 case NOTE_INSN_EH_REGION_END:
8603 exception_nesting--;
8604 if (trap_pending)
8605 goto close_shadow;
8606 break;
8607
8608 case NOTE_INSN_EPILOGUE_BEG:
8609 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8610 goto close_shadow;
8611 break;
8612 }
8613 }
8614 else if (trap_pending)
8615 {
8616 if (alpha_tp == ALPHA_TP_FUNC)
8617 {
8618 if (GET_CODE (i) == JUMP_INSN
8619 && GET_CODE (PATTERN (i)) == RETURN)
8620 goto close_shadow;
8621 }
8622 else if (alpha_tp == ALPHA_TP_INSN)
8623 {
8624 if (optimize > 0)
8625 {
8626 struct shadow_summary sum;
8627
8628 sum.used.i = 0;
8629 sum.used.fp = 0;
8630 sum.used.mem = 0;
8631 sum.defd = sum.used;
8632
8633 switch (GET_CODE (i))
8634 {
8635 case INSN:
8636 /* Annoyingly, get_attr_trap will die on these. */
8637 if (GET_CODE (PATTERN (i)) == USE
8638 || GET_CODE (PATTERN (i)) == CLOBBER)
8639 break;
8640
8641 summarize_insn (PATTERN (i), &sum, 0);
8642
8643 if ((sum.defd.i & shadow.defd.i)
8644 || (sum.defd.fp & shadow.defd.fp))
8645 {
8646 /* (c) would be violated */
8647 goto close_shadow;
8648 }
8649
8650 /* Combine shadow with summary of current insn: */
8651 shadow.used.i |= sum.used.i;
8652 shadow.used.fp |= sum.used.fp;
8653 shadow.used.mem |= sum.used.mem;
8654 shadow.defd.i |= sum.defd.i;
8655 shadow.defd.fp |= sum.defd.fp;
8656 shadow.defd.mem |= sum.defd.mem;
8657
8658 if ((sum.defd.i & shadow.used.i)
8659 || (sum.defd.fp & shadow.used.fp)
8660 || (sum.defd.mem & shadow.used.mem))
8661 {
8662 /* (a) would be violated (also takes care of (b)) */
8663 gcc_assert (get_attr_trap (i) != TRAP_YES
8664 || (!(sum.defd.i & sum.used.i)
8665 && !(sum.defd.fp & sum.used.fp)));
8666
8667 goto close_shadow;
8668 }
8669 break;
8670
8671 case JUMP_INSN:
8672 case CALL_INSN:
8673 case CODE_LABEL:
8674 goto close_shadow;
8675
8676 default:
8677 gcc_unreachable ();
8678 }
8679 }
8680 else
8681 {
8682 close_shadow:
8683 n = emit_insn_before (gen_trapb (), i);
8684 PUT_MODE (n, TImode);
8685 PUT_MODE (i, TImode);
8686 trap_pending = 0;
8687 shadow.used.i = 0;
8688 shadow.used.fp = 0;
8689 shadow.used.mem = 0;
8690 shadow.defd = shadow.used;
8691 }
8692 }
8693 }
8694
8695 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8696 && GET_CODE (i) == INSN
8697 && GET_CODE (PATTERN (i)) != USE
8698 && GET_CODE (PATTERN (i)) != CLOBBER
8699 && get_attr_trap (i) == TRAP_YES)
8700 {
8701 if (optimize && !trap_pending)
8702 summarize_insn (PATTERN (i), &shadow, 0);
8703 trap_pending = 1;
8704 }
8705 }
8706 }
8707 \f
8708 /* Alpha can only issue instruction groups simultaneously if they are
8709 suitably aligned. This is very processor-specific. */
8710 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8711 that are marked "fake". These instructions do not exist on that target,
8712 but it is possible to see these insns with deranged combinations of
8713 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8714 choose a result at random. */
8715
8716 enum alphaev4_pipe {
8717 EV4_STOP = 0,
8718 EV4_IB0 = 1,
8719 EV4_IB1 = 2,
8720 EV4_IBX = 4
8721 };
8722
8723 enum alphaev5_pipe {
8724 EV5_STOP = 0,
8725 EV5_NONE = 1,
8726 EV5_E01 = 2,
8727 EV5_E0 = 4,
8728 EV5_E1 = 8,
8729 EV5_FAM = 16,
8730 EV5_FA = 32,
8731 EV5_FM = 64
8732 };
8733
8734 static enum alphaev4_pipe
8735 alphaev4_insn_pipe (rtx insn)
8736 {
8737 if (recog_memoized (insn) < 0)
8738 return EV4_STOP;
8739 if (get_attr_length (insn) != 4)
8740 return EV4_STOP;
8741
8742 switch (get_attr_type (insn))
8743 {
8744 case TYPE_ILD:
8745 case TYPE_LDSYM:
8746 case TYPE_FLD:
8747 case TYPE_LD_L:
8748 return EV4_IBX;
8749
8750 case TYPE_IADD:
8751 case TYPE_ILOG:
8752 case TYPE_ICMOV:
8753 case TYPE_ICMP:
8754 case TYPE_FST:
8755 case TYPE_SHIFT:
8756 case TYPE_IMUL:
8757 case TYPE_FBR:
8758 case TYPE_MVI: /* fake */
8759 return EV4_IB0;
8760
8761 case TYPE_IST:
8762 case TYPE_MISC:
8763 case TYPE_IBR:
8764 case TYPE_JSR:
8765 case TYPE_CALLPAL:
8766 case TYPE_FCPYS:
8767 case TYPE_FCMOV:
8768 case TYPE_FADD:
8769 case TYPE_FDIV:
8770 case TYPE_FMUL:
8771 case TYPE_ST_C:
8772 case TYPE_MB:
8773 case TYPE_FSQRT: /* fake */
8774 case TYPE_FTOI: /* fake */
8775 case TYPE_ITOF: /* fake */
8776 return EV4_IB1;
8777
8778 default:
8779 gcc_unreachable ();
8780 }
8781 }
8782
8783 static enum alphaev5_pipe
8784 alphaev5_insn_pipe (rtx insn)
8785 {
8786 if (recog_memoized (insn) < 0)
8787 return EV5_STOP;
8788 if (get_attr_length (insn) != 4)
8789 return EV5_STOP;
8790
8791 switch (get_attr_type (insn))
8792 {
8793 case TYPE_ILD:
8794 case TYPE_FLD:
8795 case TYPE_LDSYM:
8796 case TYPE_IADD:
8797 case TYPE_ILOG:
8798 case TYPE_ICMOV:
8799 case TYPE_ICMP:
8800 return EV5_E01;
8801
8802 case TYPE_IST:
8803 case TYPE_FST:
8804 case TYPE_SHIFT:
8805 case TYPE_IMUL:
8806 case TYPE_MISC:
8807 case TYPE_MVI:
8808 case TYPE_LD_L:
8809 case TYPE_ST_C:
8810 case TYPE_MB:
8811 case TYPE_FTOI: /* fake */
8812 case TYPE_ITOF: /* fake */
8813 return EV5_E0;
8814
8815 case TYPE_IBR:
8816 case TYPE_JSR:
8817 case TYPE_CALLPAL:
8818 return EV5_E1;
8819
8820 case TYPE_FCPYS:
8821 return EV5_FAM;
8822
8823 case TYPE_FBR:
8824 case TYPE_FCMOV:
8825 case TYPE_FADD:
8826 case TYPE_FDIV:
8827 case TYPE_FSQRT: /* fake */
8828 return EV5_FA;
8829
8830 case TYPE_FMUL:
8831 return EV5_FM;
8832
8833 default:
8834 gcc_unreachable ();
8835 }
8836 }
8837
8838 /* IN_USE is a mask of the slots currently filled within the insn group.
8839 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8840 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8841
8842 LEN is, of course, the length of the group in bytes. */
8843
8844 static rtx
8845 alphaev4_next_group (rtx insn, int *pin_use, int *plen)
8846 {
8847 int len, in_use;
8848
8849 len = in_use = 0;
8850
8851 if (! INSN_P (insn)
8852 || GET_CODE (PATTERN (insn)) == CLOBBER
8853 || GET_CODE (PATTERN (insn)) == USE)
8854 goto next_and_done;
8855
8856 while (1)
8857 {
8858 enum alphaev4_pipe pipe;
8859
8860 pipe = alphaev4_insn_pipe (insn);
8861 switch (pipe)
8862 {
8863 case EV4_STOP:
8864 /* Force complex instructions to start new groups. */
8865 if (in_use)
8866 goto done;
8867
8868 /* If this is a completely unrecognized insn, it's an asm.
8869 We don't know how long it is, so record length as -1 to
8870 signal a needed realignment. */
8871 if (recog_memoized (insn) < 0)
8872 len = -1;
8873 else
8874 len = get_attr_length (insn);
8875 goto next_and_done;
8876
8877 case EV4_IBX:
8878 if (in_use & EV4_IB0)
8879 {
8880 if (in_use & EV4_IB1)
8881 goto done;
8882 in_use |= EV4_IB1;
8883 }
8884 else
8885 in_use |= EV4_IB0 | EV4_IBX;
8886 break;
8887
8888 case EV4_IB0:
8889 if (in_use & EV4_IB0)
8890 {
8891 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8892 goto done;
8893 in_use |= EV4_IB1;
8894 }
8895 in_use |= EV4_IB0;
8896 break;
8897
8898 case EV4_IB1:
8899 if (in_use & EV4_IB1)
8900 goto done;
8901 in_use |= EV4_IB1;
8902 break;
8903
8904 default:
8905 gcc_unreachable ();
8906 }
8907 len += 4;
8908
8909 /* Haifa doesn't do well scheduling branches. */
8910 if (GET_CODE (insn) == JUMP_INSN)
8911 goto next_and_done;
8912
8913 next:
8914 insn = next_nonnote_insn (insn);
8915
8916 if (!insn || ! INSN_P (insn))
8917 goto done;
8918
8919 /* Let Haifa tell us where it thinks insn group boundaries are. */
8920 if (GET_MODE (insn) == TImode)
8921 goto done;
8922
8923 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
8924 goto next;
8925 }
8926
8927 next_and_done:
8928 insn = next_nonnote_insn (insn);
8929
8930 done:
8931 *plen = len;
8932 *pin_use = in_use;
8933 return insn;
8934 }
8935
8936 /* IN_USE is a mask of the slots currently filled within the insn group.
8937 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
8938 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
8939
8940 LEN is, of course, the length of the group in bytes. */
8941
8942 static rtx
8943 alphaev5_next_group (rtx insn, int *pin_use, int *plen)
8944 {
8945 int len, in_use;
8946
8947 len = in_use = 0;
8948
8949 if (! INSN_P (insn)
8950 || GET_CODE (PATTERN (insn)) == CLOBBER
8951 || GET_CODE (PATTERN (insn)) == USE)
8952 goto next_and_done;
8953
8954 while (1)
8955 {
8956 enum alphaev5_pipe pipe;
8957
8958 pipe = alphaev5_insn_pipe (insn);
8959 switch (pipe)
8960 {
8961 case EV5_STOP:
8962 /* Force complex instructions to start new groups. */
8963 if (in_use)
8964 goto done;
8965
8966 /* If this is a completely unrecognized insn, it's an asm.
8967 We don't know how long it is, so record length as -1 to
8968 signal a needed realignment. */
8969 if (recog_memoized (insn) < 0)
8970 len = -1;
8971 else
8972 len = get_attr_length (insn);
8973 goto next_and_done;
8974
8975 /* ??? Most of the places below, we would like to assert never
8976 happen, as it would indicate an error either in Haifa, or
8977 in the scheduling description. Unfortunately, Haifa never
8978 schedules the last instruction of the BB, so we don't have
8979 an accurate TI bit to go off. */
8980 case EV5_E01:
8981 if (in_use & EV5_E0)
8982 {
8983 if (in_use & EV5_E1)
8984 goto done;
8985 in_use |= EV5_E1;
8986 }
8987 else
8988 in_use |= EV5_E0 | EV5_E01;
8989 break;
8990
8991 case EV5_E0:
8992 if (in_use & EV5_E0)
8993 {
8994 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
8995 goto done;
8996 in_use |= EV5_E1;
8997 }
8998 in_use |= EV5_E0;
8999 break;
9000
9001 case EV5_E1:
9002 if (in_use & EV5_E1)
9003 goto done;
9004 in_use |= EV5_E1;
9005 break;
9006
9007 case EV5_FAM:
9008 if (in_use & EV5_FA)
9009 {
9010 if (in_use & EV5_FM)
9011 goto done;
9012 in_use |= EV5_FM;
9013 }
9014 else
9015 in_use |= EV5_FA | EV5_FAM;
9016 break;
9017
9018 case EV5_FA:
9019 if (in_use & EV5_FA)
9020 goto done;
9021 in_use |= EV5_FA;
9022 break;
9023
9024 case EV5_FM:
9025 if (in_use & EV5_FM)
9026 goto done;
9027 in_use |= EV5_FM;
9028 break;
9029
9030 case EV5_NONE:
9031 break;
9032
9033 default:
9034 gcc_unreachable ();
9035 }
9036 len += 4;
9037
9038 /* Haifa doesn't do well scheduling branches. */
9039 /* ??? If this is predicted not-taken, slotting continues, except
9040 that no more IBR, FBR, or JSR insns may be slotted. */
9041 if (GET_CODE (insn) == JUMP_INSN)
9042 goto next_and_done;
9043
9044 next:
9045 insn = next_nonnote_insn (insn);
9046
9047 if (!insn || ! INSN_P (insn))
9048 goto done;
9049
9050 /* Let Haifa tell us where it thinks insn group boundaries are. */
9051 if (GET_MODE (insn) == TImode)
9052 goto done;
9053
9054 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9055 goto next;
9056 }
9057
9058 next_and_done:
9059 insn = next_nonnote_insn (insn);
9060
9061 done:
9062 *plen = len;
9063 *pin_use = in_use;
9064 return insn;
9065 }
9066
9067 static rtx
9068 alphaev4_next_nop (int *pin_use)
9069 {
9070 int in_use = *pin_use;
9071 rtx nop;
9072
9073 if (!(in_use & EV4_IB0))
9074 {
9075 in_use |= EV4_IB0;
9076 nop = gen_nop ();
9077 }
9078 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9079 {
9080 in_use |= EV4_IB1;
9081 nop = gen_nop ();
9082 }
9083 else if (TARGET_FP && !(in_use & EV4_IB1))
9084 {
9085 in_use |= EV4_IB1;
9086 nop = gen_fnop ();
9087 }
9088 else
9089 nop = gen_unop ();
9090
9091 *pin_use = in_use;
9092 return nop;
9093 }
9094
9095 static rtx
9096 alphaev5_next_nop (int *pin_use)
9097 {
9098 int in_use = *pin_use;
9099 rtx nop;
9100
9101 if (!(in_use & EV5_E1))
9102 {
9103 in_use |= EV5_E1;
9104 nop = gen_nop ();
9105 }
9106 else if (TARGET_FP && !(in_use & EV5_FA))
9107 {
9108 in_use |= EV5_FA;
9109 nop = gen_fnop ();
9110 }
9111 else if (TARGET_FP && !(in_use & EV5_FM))
9112 {
9113 in_use |= EV5_FM;
9114 nop = gen_fnop ();
9115 }
9116 else
9117 nop = gen_unop ();
9118
9119 *pin_use = in_use;
9120 return nop;
9121 }
9122
9123 /* The instruction group alignment main loop. */
9124
9125 static void
9126 alpha_align_insns (unsigned int max_align,
9127 rtx (*next_group) (rtx, int *, int *),
9128 rtx (*next_nop) (int *))
9129 {
9130 /* ALIGN is the known alignment for the insn group. */
9131 unsigned int align;
9132 /* OFS is the offset of the current insn in the insn group. */
9133 int ofs;
9134 int prev_in_use, in_use, len, ldgp;
9135 rtx i, next;
9136
9137 /* Let shorten branches care for assigning alignments to code labels. */
9138 shorten_branches (get_insns ());
9139
9140 if (align_functions < 4)
9141 align = 4;
9142 else if ((unsigned int) align_functions < max_align)
9143 align = align_functions;
9144 else
9145 align = max_align;
9146
9147 ofs = prev_in_use = 0;
9148 i = get_insns ();
9149 if (GET_CODE (i) == NOTE)
9150 i = next_nonnote_insn (i);
9151
9152 ldgp = alpha_function_needs_gp ? 8 : 0;
9153
9154 while (i)
9155 {
9156 next = (*next_group) (i, &in_use, &len);
9157
9158 /* When we see a label, resync alignment etc. */
9159 if (GET_CODE (i) == CODE_LABEL)
9160 {
9161 unsigned int new_align = 1 << label_to_alignment (i);
9162
9163 if (new_align >= align)
9164 {
9165 align = new_align < max_align ? new_align : max_align;
9166 ofs = 0;
9167 }
9168
9169 else if (ofs & (new_align-1))
9170 ofs = (ofs | (new_align-1)) + 1;
9171 gcc_assert (!len);
9172 }
9173
9174 /* Handle complex instructions special. */
9175 else if (in_use == 0)
9176 {
9177 /* Asms will have length < 0. This is a signal that we have
9178 lost alignment knowledge. Assume, however, that the asm
9179 will not mis-align instructions. */
9180 if (len < 0)
9181 {
9182 ofs = 0;
9183 align = 4;
9184 len = 0;
9185 }
9186 }
9187
9188 /* If the known alignment is smaller than the recognized insn group,
9189 realign the output. */
9190 else if ((int) align < len)
9191 {
9192 unsigned int new_log_align = len > 8 ? 4 : 3;
9193 rtx prev, where;
9194
9195 where = prev = prev_nonnote_insn (i);
9196 if (!where || GET_CODE (where) != CODE_LABEL)
9197 where = i;
9198
9199 /* Can't realign between a call and its gp reload. */
9200 if (! (TARGET_EXPLICIT_RELOCS
9201 && prev && GET_CODE (prev) == CALL_INSN))
9202 {
9203 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9204 align = 1 << new_log_align;
9205 ofs = 0;
9206 }
9207 }
9208
9209 /* We may not insert padding inside the initial ldgp sequence. */
9210 else if (ldgp > 0)
9211 ldgp -= len;
9212
9213 /* If the group won't fit in the same INT16 as the previous,
9214 we need to add padding to keep the group together. Rather
9215 than simply leaving the insn filling to the assembler, we
9216 can make use of the knowledge of what sorts of instructions
9217 were issued in the previous group to make sure that all of
9218 the added nops are really free. */
9219 else if (ofs + len > (int) align)
9220 {
9221 int nop_count = (align - ofs) / 4;
9222 rtx where;
9223
9224 /* Insert nops before labels, branches, and calls to truly merge
9225 the execution of the nops with the previous instruction group. */
9226 where = prev_nonnote_insn (i);
9227 if (where)
9228 {
9229 if (GET_CODE (where) == CODE_LABEL)
9230 {
9231 rtx where2 = prev_nonnote_insn (where);
9232 if (where2 && GET_CODE (where2) == JUMP_INSN)
9233 where = where2;
9234 }
9235 else if (GET_CODE (where) == INSN)
9236 where = i;
9237 }
9238 else
9239 where = i;
9240
9241 do
9242 emit_insn_before ((*next_nop)(&prev_in_use), where);
9243 while (--nop_count);
9244 ofs = 0;
9245 }
9246
9247 ofs = (ofs + len) & (align - 1);
9248 prev_in_use = in_use;
9249 i = next;
9250 }
9251 }
9252 \f
9253 /* Machine dependent reorg pass. */
9254
9255 static void
9256 alpha_reorg (void)
9257 {
9258 if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
9259 alpha_handle_trap_shadows ();
9260
9261 /* Due to the number of extra trapb insns, don't bother fixing up
9262 alignment when trap precision is instruction. Moreover, we can
9263 only do our job when sched2 is run. */
9264 if (optimize && !optimize_size
9265 && alpha_tp != ALPHA_TP_INSN
9266 && flag_schedule_insns_after_reload)
9267 {
9268 if (alpha_tune == PROCESSOR_EV4)
9269 alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
9270 else if (alpha_tune == PROCESSOR_EV5)
9271 alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
9272 }
9273 }
9274 \f
9275 #if !TARGET_ABI_UNICOSMK
9276
9277 #ifdef HAVE_STAMP_H
9278 #include <stamp.h>
9279 #endif
9280
9281 static void
9282 alpha_file_start (void)
9283 {
9284 #ifdef OBJECT_FORMAT_ELF
9285 /* If emitting dwarf2 debug information, we cannot generate a .file
9286 directive to start the file, as it will conflict with dwarf2out
9287 file numbers. So it's only useful when emitting mdebug output. */
9288 targetm.file_start_file_directive = (write_symbols == DBX_DEBUG);
9289 #endif
9290
9291 default_file_start ();
9292 #ifdef MS_STAMP
9293 fprintf (asm_out_file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
9294 #endif
9295
9296 fputs ("\t.set noreorder\n", asm_out_file);
9297 fputs ("\t.set volatile\n", asm_out_file);
9298 if (!TARGET_ABI_OPEN_VMS)
9299 fputs ("\t.set noat\n", asm_out_file);
9300 if (TARGET_EXPLICIT_RELOCS)
9301 fputs ("\t.set nomacro\n", asm_out_file);
9302 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9303 {
9304 const char *arch;
9305
9306 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9307 arch = "ev6";
9308 else if (TARGET_MAX)
9309 arch = "pca56";
9310 else if (TARGET_BWX)
9311 arch = "ev56";
9312 else if (alpha_cpu == PROCESSOR_EV5)
9313 arch = "ev5";
9314 else
9315 arch = "ev4";
9316
9317 fprintf (asm_out_file, "\t.arch %s\n", arch);
9318 }
9319 }
9320 #endif
9321
9322 #ifdef OBJECT_FORMAT_ELF
9323 /* Since we don't have a .dynbss section, we should not allow global
9324 relocations in the .rodata section. */
9325
9326 static int
9327 alpha_elf_reloc_rw_mask (void)
9328 {
9329 return flag_pic ? 3 : 2;
9330 }
9331
9332 /* Return a section for X. The only special thing we do here is to
9333 honor small data. */
9334
9335 static section *
9336 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9337 unsigned HOST_WIDE_INT align)
9338 {
9339 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9340 /* ??? Consider using mergeable sdata sections. */
9341 return sdata_section;
9342 else
9343 return default_elf_select_rtx_section (mode, x, align);
9344 }
9345
9346 #endif /* OBJECT_FORMAT_ELF */
9347 \f
9348 /* Structure to collect function names for final output in link section. */
9349 /* Note that items marked with GTY can't be ifdef'ed out. */
9350
9351 enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN};
9352 enum reloc_kind {KIND_LINKAGE, KIND_CODEADDR};
9353
9354 struct alpha_links GTY(())
9355 {
9356 int num;
9357 rtx linkage;
9358 enum links_kind lkind;
9359 enum reloc_kind rkind;
9360 };
9361
9362 struct alpha_funcs GTY(())
9363 {
9364 int num;
9365 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
9366 links;
9367 };
9368
9369 static GTY ((param1_is (char *), param2_is (struct alpha_links *)))
9370 splay_tree alpha_links_tree;
9371 static GTY ((param1_is (tree), param2_is (struct alpha_funcs *)))
9372 splay_tree alpha_funcs_tree;
9373
9374 static GTY(()) int alpha_funcs_num;
9375
9376 #if TARGET_ABI_OPEN_VMS
9377
9378 /* Return the VMS argument type corresponding to MODE. */
9379
9380 enum avms_arg_type
9381 alpha_arg_type (enum machine_mode mode)
9382 {
9383 switch (mode)
9384 {
9385 case SFmode:
9386 return TARGET_FLOAT_VAX ? FF : FS;
9387 case DFmode:
9388 return TARGET_FLOAT_VAX ? FD : FT;
9389 default:
9390 return I64;
9391 }
9392 }
9393
9394 /* Return an rtx for an integer representing the VMS Argument Information
9395 register value. */
9396
9397 rtx
9398 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9399 {
9400 unsigned HOST_WIDE_INT regval = cum.num_args;
9401 int i;
9402
9403 for (i = 0; i < 6; i++)
9404 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9405
9406 return GEN_INT (regval);
9407 }
9408 \f
9409 /* Make (or fake) .linkage entry for function call.
9410
9411 IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.
9412
9413 Return an SYMBOL_REF rtx for the linkage. */
9414
9415 rtx
9416 alpha_need_linkage (const char *name, int is_local)
9417 {
9418 splay_tree_node node;
9419 struct alpha_links *al;
9420
9421 if (name[0] == '*')
9422 name++;
9423
9424 if (is_local)
9425 {
9426 struct alpha_funcs *cfaf;
9427
9428 if (!alpha_funcs_tree)
9429 alpha_funcs_tree = splay_tree_new_ggc ((splay_tree_compare_fn)
9430 splay_tree_compare_pointers);
9431
9432 cfaf = (struct alpha_funcs *) ggc_alloc (sizeof (struct alpha_funcs));
9433
9434 cfaf->links = 0;
9435 cfaf->num = ++alpha_funcs_num;
9436
9437 splay_tree_insert (alpha_funcs_tree,
9438 (splay_tree_key) current_function_decl,
9439 (splay_tree_value) cfaf);
9440 }
9441
9442 if (alpha_links_tree)
9443 {
9444 /* Is this name already defined? */
9445
9446 node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name);
9447 if (node)
9448 {
9449 al = (struct alpha_links *) node->value;
9450 if (is_local)
9451 {
9452 /* Defined here but external assumed. */
9453 if (al->lkind == KIND_EXTERN)
9454 al->lkind = KIND_LOCAL;
9455 }
9456 else
9457 {
9458 /* Used here but unused assumed. */
9459 if (al->lkind == KIND_UNUSED)
9460 al->lkind = KIND_LOCAL;
9461 }
9462 return al->linkage;
9463 }
9464 }
9465 else
9466 alpha_links_tree = splay_tree_new_ggc ((splay_tree_compare_fn) strcmp);
9467
9468 al = (struct alpha_links *) ggc_alloc (sizeof (struct alpha_links));
9469 name = ggc_strdup (name);
9470
9471 /* Assume external if no definition. */
9472 al->lkind = (is_local ? KIND_UNUSED : KIND_EXTERN);
9473
9474 /* Ensure we have an IDENTIFIER so assemble_name can mark it used. */
9475 get_identifier (name);
9476
9477 /* Construct a SYMBOL_REF for us to call. */
9478 {
9479 size_t name_len = strlen (name);
9480 char *linksym = alloca (name_len + 6);
9481 linksym[0] = '$';
9482 memcpy (linksym + 1, name, name_len);
9483 memcpy (linksym + 1 + name_len, "..lk", 5);
9484 al->linkage = gen_rtx_SYMBOL_REF (Pmode,
9485 ggc_alloc_string (linksym, name_len + 5));
9486 }
9487
9488 splay_tree_insert (alpha_links_tree, (splay_tree_key) name,
9489 (splay_tree_value) al);
9490
9491 return al->linkage;
9492 }
9493
9494 rtx
9495 alpha_use_linkage (rtx linkage, tree cfundecl, int lflag, int rflag)
9496 {
9497 splay_tree_node cfunnode;
9498 struct alpha_funcs *cfaf;
9499 struct alpha_links *al;
9500 const char *name = XSTR (linkage, 0);
9501
9502 cfaf = (struct alpha_funcs *) 0;
9503 al = (struct alpha_links *) 0;
9504
9505 cfunnode = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) cfundecl);
9506 cfaf = (struct alpha_funcs *) cfunnode->value;
9507
9508 if (cfaf->links)
9509 {
9510 splay_tree_node lnode;
9511
9512 /* Is this name already defined? */
9513
9514 lnode = splay_tree_lookup (cfaf->links, (splay_tree_key) name);
9515 if (lnode)
9516 al = (struct alpha_links *) lnode->value;
9517 }
9518 else
9519 cfaf->links = splay_tree_new_ggc ((splay_tree_compare_fn) strcmp);
9520
9521 if (!al)
9522 {
9523 size_t name_len;
9524 size_t buflen;
9525 char buf [512];
9526 char *linksym;
9527 splay_tree_node node = 0;
9528 struct alpha_links *anl;
9529
9530 if (name[0] == '*')
9531 name++;
9532
9533 name_len = strlen (name);
9534
9535 al = (struct alpha_links *) ggc_alloc (sizeof (struct alpha_links));
9536 al->num = cfaf->num;
9537
9538 node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name);
9539 if (node)
9540 {
9541 anl = (struct alpha_links *) node->value;
9542 al->lkind = anl->lkind;
9543 }
9544
9545 sprintf (buf, "$%d..%s..lk", cfaf->num, name);
9546 buflen = strlen (buf);
9547 linksym = alloca (buflen + 1);
9548 memcpy (linksym, buf, buflen + 1);
9549
9550 al->linkage = gen_rtx_SYMBOL_REF
9551 (Pmode, ggc_alloc_string (linksym, buflen + 1));
9552
9553 splay_tree_insert (cfaf->links, (splay_tree_key) name,
9554 (splay_tree_value) al);
9555 }
9556
9557 if (rflag)
9558 al->rkind = KIND_CODEADDR;
9559 else
9560 al->rkind = KIND_LINKAGE;
9561
9562 if (lflag)
9563 return gen_rtx_MEM (Pmode, plus_constant (al->linkage, 8));
9564 else
9565 return al->linkage;
9566 }
9567
9568 static int
9569 alpha_write_one_linkage (splay_tree_node node, void *data)
9570 {
9571 const char *const name = (const char *) node->key;
9572 struct alpha_links *link = (struct alpha_links *) node->value;
9573 FILE *stream = (FILE *) data;
9574
9575 fprintf (stream, "$%d..%s..lk:\n", link->num, name);
9576 if (link->rkind == KIND_CODEADDR)
9577 {
9578 if (link->lkind == KIND_LOCAL)
9579 {
9580 /* Local and used */
9581 fprintf (stream, "\t.quad %s..en\n", name);
9582 }
9583 else
9584 {
9585 /* External and used, request code address. */
9586 fprintf (stream, "\t.code_address %s\n", name);
9587 }
9588 }
9589 else
9590 {
9591 if (link->lkind == KIND_LOCAL)
9592 {
9593 /* Local and used, build linkage pair. */
9594 fprintf (stream, "\t.quad %s..en\n", name);
9595 fprintf (stream, "\t.quad %s\n", name);
9596 }
9597 else
9598 {
9599 /* External and used, request linkage pair. */
9600 fprintf (stream, "\t.linkage %s\n", name);
9601 }
9602 }
9603
9604 return 0;
9605 }
9606
9607 static void
9608 alpha_write_linkage (FILE *stream, const char *funname, tree fundecl)
9609 {
9610 splay_tree_node node;
9611 struct alpha_funcs *func;
9612
9613 fprintf (stream, "\t.link\n");
9614 fprintf (stream, "\t.align 3\n");
9615 in_section = NULL;
9616
9617 node = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) fundecl);
9618 func = (struct alpha_funcs *) node->value;
9619
9620 fputs ("\t.name ", stream);
9621 assemble_name (stream, funname);
9622 fputs ("..na\n", stream);
9623 ASM_OUTPUT_LABEL (stream, funname);
9624 fprintf (stream, "\t.pdesc ");
9625 assemble_name (stream, funname);
9626 fprintf (stream, "..en,%s\n",
9627 alpha_procedure_type == PT_STACK ? "stack"
9628 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9629
9630 if (func->links)
9631 {
9632 splay_tree_foreach (func->links, alpha_write_one_linkage, stream);
9633 /* splay_tree_delete (func->links); */
9634 }
9635 }
9636
9637 /* Given a decl, a section name, and whether the decl initializer
9638 has relocs, choose attributes for the section. */
9639
9640 #define SECTION_VMS_OVERLAY SECTION_FORGET
9641 #define SECTION_VMS_GLOBAL SECTION_MACH_DEP
9642 #define SECTION_VMS_INITIALIZE (SECTION_VMS_GLOBAL << 1)
9643
9644 static unsigned int
9645 vms_section_type_flags (tree decl, const char *name, int reloc)
9646 {
9647 unsigned int flags = default_section_type_flags (decl, name, reloc);
9648
9649 if (decl && DECL_ATTRIBUTES (decl)
9650 && lookup_attribute ("overlaid", DECL_ATTRIBUTES (decl)))
9651 flags |= SECTION_VMS_OVERLAY;
9652 if (decl && DECL_ATTRIBUTES (decl)
9653 && lookup_attribute ("global", DECL_ATTRIBUTES (decl)))
9654 flags |= SECTION_VMS_GLOBAL;
9655 if (decl && DECL_ATTRIBUTES (decl)
9656 && lookup_attribute ("initialize", DECL_ATTRIBUTES (decl)))
9657 flags |= SECTION_VMS_INITIALIZE;
9658
9659 return flags;
9660 }
9661
9662 /* Switch to an arbitrary section NAME with attributes as specified
9663 by FLAGS. ALIGN specifies any known alignment requirements for
9664 the section; 0 if the default should be used. */
9665
9666 static void
9667 vms_asm_named_section (const char *name, unsigned int flags,
9668 tree decl ATTRIBUTE_UNUSED)
9669 {
9670 fputc ('\n', asm_out_file);
9671 fprintf (asm_out_file, ".section\t%s", name);
9672
9673 if (flags & SECTION_VMS_OVERLAY)
9674 fprintf (asm_out_file, ",OVR");
9675 if (flags & SECTION_VMS_GLOBAL)
9676 fprintf (asm_out_file, ",GBL");
9677 if (flags & SECTION_VMS_INITIALIZE)
9678 fprintf (asm_out_file, ",NOMOD");
9679 if (flags & SECTION_DEBUG)
9680 fprintf (asm_out_file, ",NOWRT");
9681
9682 fputc ('\n', asm_out_file);
9683 }
9684
9685 /* Record an element in the table of global constructors. SYMBOL is
9686 a SYMBOL_REF of the function to be called; PRIORITY is a number
9687 between 0 and MAX_INIT_PRIORITY.
9688
9689 Differs from default_ctors_section_asm_out_constructor in that the
9690 width of the .ctors entry is always 64 bits, rather than the 32 bits
9691 used by a normal pointer. */
9692
9693 static void
9694 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9695 {
9696 switch_to_section (ctors_section);
9697 assemble_align (BITS_PER_WORD);
9698 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9699 }
9700
9701 static void
9702 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9703 {
9704 switch_to_section (dtors_section);
9705 assemble_align (BITS_PER_WORD);
9706 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9707 }
9708 #else
9709
9710 rtx
9711 alpha_need_linkage (const char *name ATTRIBUTE_UNUSED,
9712 int is_local ATTRIBUTE_UNUSED)
9713 {
9714 return NULL_RTX;
9715 }
9716
9717 rtx
9718 alpha_use_linkage (rtx linkage ATTRIBUTE_UNUSED,
9719 tree cfundecl ATTRIBUTE_UNUSED,
9720 int lflag ATTRIBUTE_UNUSED,
9721 int rflag ATTRIBUTE_UNUSED)
9722 {
9723 return NULL_RTX;
9724 }
9725
9726 #endif /* TARGET_ABI_OPEN_VMS */
9727 \f
9728 #if TARGET_ABI_UNICOSMK
9729
9730 /* This evaluates to true if we do not know how to pass TYPE solely in
9731 registers. This is the case for all arguments that do not fit in two
9732 registers. */
9733
9734 static bool
9735 unicosmk_must_pass_in_stack (enum machine_mode mode, tree type)
9736 {
9737 if (type == NULL)
9738 return false;
9739
9740 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9741 return true;
9742 if (TREE_ADDRESSABLE (type))
9743 return true;
9744
9745 return ALPHA_ARG_SIZE (mode, type, 0) > 2;
9746 }
9747
9748 /* Define the offset between two registers, one to be eliminated, and the
9749 other its replacement, at the start of a routine. */
9750
9751 int
9752 unicosmk_initial_elimination_offset (int from, int to)
9753 {
9754 int fixed_size;
9755
9756 fixed_size = alpha_sa_size();
9757 if (fixed_size != 0)
9758 fixed_size += 48;
9759
9760 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9761 return -fixed_size;
9762 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9763 return 0;
9764 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9765 return (ALPHA_ROUND (current_function_outgoing_args_size)
9766 + ALPHA_ROUND (get_frame_size()));
9767 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9768 return (ALPHA_ROUND (fixed_size)
9769 + ALPHA_ROUND (get_frame_size()
9770 + current_function_outgoing_args_size));
9771 else
9772 gcc_unreachable ();
9773 }
9774
9775 /* Output the module name for .ident and .end directives. We have to strip
9776 directories and add make sure that the module name starts with a letter
9777 or '$'. */
9778
9779 static void
9780 unicosmk_output_module_name (FILE *file)
9781 {
9782 const char *name = lbasename (main_input_filename);
9783 unsigned len = strlen (name);
9784 char *clean_name = alloca (len + 2);
9785 char *ptr = clean_name;
9786
9787 /* CAM only accepts module names that start with a letter or '$'. We
9788 prefix the module name with a '$' if necessary. */
9789
9790 if (!ISALPHA (*name))
9791 *ptr++ = '$';
9792 memcpy (ptr, name, len + 1);
9793 clean_symbol_name (clean_name);
9794 fputs (clean_name, file);
9795 }
9796
9797 /* Output the definition of a common variable. */
9798
9799 void
9800 unicosmk_output_common (FILE *file, const char *name, int size, int align)
9801 {
9802 tree name_tree;
9803 printf ("T3E__: common %s\n", name);
9804
9805 in_section = NULL;
9806 fputs("\t.endp\n\n\t.psect ", file);
9807 assemble_name(file, name);
9808 fprintf(file, ",%d,common\n", floor_log2 (align / BITS_PER_UNIT));
9809 fprintf(file, "\t.byte\t0:%d\n", size);
9810
9811 /* Mark the symbol as defined in this module. */
9812 name_tree = get_identifier (name);
9813 TREE_ASM_WRITTEN (name_tree) = 1;
9814 }
9815
9816 #define SECTION_PUBLIC SECTION_MACH_DEP
9817 #define SECTION_MAIN (SECTION_PUBLIC << 1)
9818 static int current_section_align;
9819
9820 /* A get_unnamed_section callback for switching to the text section. */
9821
9822 static void
9823 unicosmk_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9824 {
9825 static int count = 0;
9826 fprintf (asm_out_file, "\t.endp\n\n\t.psect\tgcc@text___%d,code\n", count++);
9827 }
9828
9829 /* A get_unnamed_section callback for switching to the data section. */
9830
9831 static void
9832 unicosmk_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9833 {
9834 static int count = 1;
9835 fprintf (asm_out_file, "\t.endp\n\n\t.psect\tgcc@data___%d,data\n", count++);
9836 }
9837
9838 /* Implement TARGET_ASM_INIT_SECTIONS.
9839
9840 The Cray assembler is really weird with respect to sections. It has only
9841 named sections and you can't reopen a section once it has been closed.
9842 This means that we have to generate unique names whenever we want to
9843 reenter the text or the data section. */
9844
9845 static void
9846 unicosmk_init_sections (void)
9847 {
9848 text_section = get_unnamed_section (SECTION_CODE,
9849 unicosmk_output_text_section_asm_op,
9850 NULL);
9851 data_section = get_unnamed_section (SECTION_WRITE,
9852 unicosmk_output_data_section_asm_op,
9853 NULL);
9854 readonly_data_section = data_section;
9855 }
9856
9857 static unsigned int
9858 unicosmk_section_type_flags (tree decl, const char *name,
9859 int reloc ATTRIBUTE_UNUSED)
9860 {
9861 unsigned int flags = default_section_type_flags (decl, name, reloc);
9862
9863 if (!decl)
9864 return flags;
9865
9866 if (TREE_CODE (decl) == FUNCTION_DECL)
9867 {
9868 current_section_align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9869 if (align_functions_log > current_section_align)
9870 current_section_align = align_functions_log;
9871
9872 if (! strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), "main"))
9873 flags |= SECTION_MAIN;
9874 }
9875 else
9876 current_section_align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
9877
9878 if (TREE_PUBLIC (decl))
9879 flags |= SECTION_PUBLIC;
9880
9881 return flags;
9882 }
9883
9884 /* Generate a section name for decl and associate it with the
9885 declaration. */
9886
9887 static void
9888 unicosmk_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
9889 {
9890 const char *name;
9891 int len;
9892
9893 gcc_assert (decl);
9894
9895 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9896 name = default_strip_name_encoding (name);
9897 len = strlen (name);
9898
9899 if (TREE_CODE (decl) == FUNCTION_DECL)
9900 {
9901 char *string;
9902
9903 /* It is essential that we prefix the section name here because
9904 otherwise the section names generated for constructors and
9905 destructors confuse collect2. */
9906
9907 string = alloca (len + 6);
9908 sprintf (string, "code@%s", name);
9909 DECL_SECTION_NAME (decl) = build_string (len + 5, string);
9910 }
9911 else if (TREE_PUBLIC (decl))
9912 DECL_SECTION_NAME (decl) = build_string (len, name);
9913 else
9914 {
9915 char *string;
9916
9917 string = alloca (len + 6);
9918 sprintf (string, "data@%s", name);
9919 DECL_SECTION_NAME (decl) = build_string (len + 5, string);
9920 }
9921 }
9922
9923 /* Switch to an arbitrary section NAME with attributes as specified
9924 by FLAGS. ALIGN specifies any known alignment requirements for
9925 the section; 0 if the default should be used. */
9926
9927 static void
9928 unicosmk_asm_named_section (const char *name, unsigned int flags,
9929 tree decl ATTRIBUTE_UNUSED)
9930 {
9931 const char *kind;
9932
9933 /* Close the previous section. */
9934
9935 fputs ("\t.endp\n\n", asm_out_file);
9936
9937 /* Find out what kind of section we are opening. */
9938
9939 if (flags & SECTION_MAIN)
9940 fputs ("\t.start\tmain\n", asm_out_file);
9941
9942 if (flags & SECTION_CODE)
9943 kind = "code";
9944 else if (flags & SECTION_PUBLIC)
9945 kind = "common";
9946 else
9947 kind = "data";
9948
9949 if (current_section_align != 0)
9950 fprintf (asm_out_file, "\t.psect\t%s,%d,%s\n", name,
9951 current_section_align, kind);
9952 else
9953 fprintf (asm_out_file, "\t.psect\t%s,%s\n", name, kind);
9954 }
9955
9956 static void
9957 unicosmk_insert_attributes (tree decl, tree *attr_ptr ATTRIBUTE_UNUSED)
9958 {
9959 if (DECL_P (decl)
9960 && (TREE_PUBLIC (decl) || TREE_CODE (decl) == FUNCTION_DECL))
9961 unicosmk_unique_section (decl, 0);
9962 }
9963
9964 /* Output an alignment directive. We have to use the macro 'gcc@code@align'
9965 in code sections because .align fill unused space with zeroes. */
9966
9967 void
9968 unicosmk_output_align (FILE *file, int align)
9969 {
9970 if (inside_function)
9971 fprintf (file, "\tgcc@code@align\t%d\n", align);
9972 else
9973 fprintf (file, "\t.align\t%d\n", align);
9974 }
9975
9976 /* Add a case vector to the current function's list of deferred case
9977 vectors. Case vectors have to be put into a separate section because CAM
9978 does not allow data definitions in code sections. */
9979
9980 void
9981 unicosmk_defer_case_vector (rtx lab, rtx vec)
9982 {
9983 struct machine_function *machine = cfun->machine;
9984
9985 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9986 machine->addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec,
9987 machine->addr_list);
9988 }
9989
9990 /* Output a case vector. */
9991
9992 static void
9993 unicosmk_output_addr_vec (FILE *file, rtx vec)
9994 {
9995 rtx lab = XEXP (vec, 0);
9996 rtx body = XEXP (vec, 1);
9997 int vlen = XVECLEN (body, 0);
9998 int idx;
9999
10000 (*targetm.asm_out.internal_label) (file, "L", CODE_LABEL_NUMBER (lab));
10001
10002 for (idx = 0; idx < vlen; idx++)
10003 {
10004 ASM_OUTPUT_ADDR_VEC_ELT
10005 (file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10006 }
10007 }
10008
10009 /* Output current function's deferred case vectors. */
10010
10011 static void
10012 unicosmk_output_deferred_case_vectors (FILE *file)
10013 {
10014 struct machine_function *machine = cfun->machine;
10015 rtx t;
10016
10017 if (machine->addr_list == NULL_RTX)
10018 return;
10019
10020 switch_to_section (data_section);
10021 for (t = machine->addr_list; t; t = XEXP (t, 1))
10022 unicosmk_output_addr_vec (file, XEXP (t, 0));
10023 }
10024
10025 /* Generate the name of the SSIB section for the current function. */
10026
10027 #define SSIB_PREFIX "__SSIB_"
10028 #define SSIB_PREFIX_LEN 7
10029
10030 static const char *
10031 unicosmk_ssib_name (void)
10032 {
10033 /* This is ok since CAM won't be able to deal with names longer than that
10034 anyway. */
10035
10036 static char name[256];
10037
10038 rtx x;
10039 const char *fnname;
10040 int len;
10041
10042 x = DECL_RTL (cfun->decl);
10043 gcc_assert (GET_CODE (x) == MEM);
10044 x = XEXP (x, 0);
10045 gcc_assert (GET_CODE (x) == SYMBOL_REF);
10046 fnname = XSTR (x, 0);
10047
10048 len = strlen (fnname);
10049 if (len + SSIB_PREFIX_LEN > 255)
10050 len = 255 - SSIB_PREFIX_LEN;
10051
10052 strcpy (name, SSIB_PREFIX);
10053 strncpy (name + SSIB_PREFIX_LEN, fnname, len);
10054 name[len + SSIB_PREFIX_LEN] = 0;
10055
10056 return name;
10057 }
10058
10059 /* Set up the dynamic subprogram information block (DSIB) and update the
10060 frame pointer register ($15) for subroutines which have a frame. If the
10061 subroutine doesn't have a frame, simply increment $15. */
10062
10063 static void
10064 unicosmk_gen_dsib (unsigned long *imaskP)
10065 {
10066 if (alpha_procedure_type == PT_STACK)
10067 {
10068 const char *ssib_name;
10069 rtx mem;
10070
10071 /* Allocate 64 bytes for the DSIB. */
10072
10073 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
10074 GEN_INT (-64))));
10075 emit_insn (gen_blockage ());
10076
10077 /* Save the return address. */
10078
10079 mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 56));
10080 set_mem_alias_set (mem, alpha_sr_alias_set);
10081 FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA)));
10082 (*imaskP) &= ~(1UL << REG_RA);
10083
10084 /* Save the old frame pointer. */
10085
10086 mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 48));
10087 set_mem_alias_set (mem, alpha_sr_alias_set);
10088 FRP (emit_move_insn (mem, hard_frame_pointer_rtx));
10089 (*imaskP) &= ~(1UL << HARD_FRAME_POINTER_REGNUM);
10090
10091 emit_insn (gen_blockage ());
10092
10093 /* Store the SSIB pointer. */
10094
10095 ssib_name = ggc_strdup (unicosmk_ssib_name ());
10096 mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 32));
10097 set_mem_alias_set (mem, alpha_sr_alias_set);
10098
10099 FRP (emit_move_insn (gen_rtx_REG (DImode, 5),
10100 gen_rtx_SYMBOL_REF (Pmode, ssib_name)));
10101 FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 5)));
10102
10103 /* Save the CIW index. */
10104
10105 mem = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, 24));
10106 set_mem_alias_set (mem, alpha_sr_alias_set);
10107 FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 25)));
10108
10109 emit_insn (gen_blockage ());
10110
10111 /* Set the new frame pointer. */
10112
10113 FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
10114 stack_pointer_rtx, GEN_INT (64))));
10115
10116 }
10117 else
10118 {
10119 /* Increment the frame pointer register to indicate that we do not
10120 have a frame. */
10121
10122 FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
10123 hard_frame_pointer_rtx, const1_rtx)));
10124 }
10125 }
10126
10127 /* Output the static subroutine information block for the current
10128 function. */
10129
10130 static void
10131 unicosmk_output_ssib (FILE *file, const char *fnname)
10132 {
10133 int len;
10134 int i;
10135 rtx x;
10136 rtx ciw;
10137 struct machine_function *machine = cfun->machine;
10138
10139 in_section = NULL;
10140 fprintf (file, "\t.endp\n\n\t.psect\t%s%s,data\n", user_label_prefix,
10141 unicosmk_ssib_name ());
10142
10143 /* Some required stuff and the function name length. */
10144
10145 len = strlen (fnname);
10146 fprintf (file, "\t.quad\t^X20008%2.2X28\n", len);
10147
10148 /* Saved registers
10149 ??? We don't do that yet. */
10150
10151 fputs ("\t.quad\t0\n", file);
10152
10153 /* Function address. */
10154
10155 fputs ("\t.quad\t", file);
10156 assemble_name (file, fnname);
10157 putc ('\n', file);
10158
10159 fputs ("\t.quad\t0\n", file);
10160 fputs ("\t.quad\t0\n", file);
10161
10162 /* Function name.
10163 ??? We do it the same way Cray CC does it but this could be
10164 simplified. */
10165
10166 for( i = 0; i < len; i++ )
10167 fprintf (file, "\t.byte\t%d\n", (int)(fnname[i]));
10168 if( (len % 8) == 0 )
10169 fputs ("\t.quad\t0\n", file);
10170 else
10171 fprintf (file, "\t.bits\t%d : 0\n", (8 - (len % 8))*8);
10172
10173 /* All call information words used in the function. */
10174
10175 for (x = machine->first_ciw; x; x = XEXP (x, 1))
10176 {
10177 ciw = XEXP (x, 0);
10178 #if HOST_BITS_PER_WIDE_INT == 32
10179 fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_DOUBLE_HEX "\n",
10180 CONST_DOUBLE_HIGH (ciw), CONST_DOUBLE_LOW (ciw));
10181 #else
10182 fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n", INTVAL (ciw));
10183 #endif
10184 }
10185 }
10186
10187 /* Add a call information word (CIW) to the list of the current function's
10188 CIWs and return its index.
10189
10190 X is a CONST_INT or CONST_DOUBLE representing the CIW. */
10191
10192 rtx
10193 unicosmk_add_call_info_word (rtx x)
10194 {
10195 rtx node;
10196 struct machine_function *machine = cfun->machine;
10197
10198 node = gen_rtx_EXPR_LIST (VOIDmode, x, NULL_RTX);
10199 if (machine->first_ciw == NULL_RTX)
10200 machine->first_ciw = node;
10201 else
10202 XEXP (machine->last_ciw, 1) = node;
10203
10204 machine->last_ciw = node;
10205 ++machine->ciw_count;
10206
10207 return GEN_INT (machine->ciw_count
10208 + strlen (current_function_name ())/8 + 5);
10209 }
10210
10211 /* The Cray assembler doesn't accept extern declarations for symbols which
10212 are defined in the same file. We have to keep track of all global
10213 symbols which are referenced and/or defined in a source file and output
10214 extern declarations for those which are referenced but not defined at
10215 the end of file. */
10216
10217 /* List of identifiers for which an extern declaration might have to be
10218 emitted. */
10219 /* FIXME: needs to use GC, so it can be saved and restored for PCH. */
10220
10221 struct unicosmk_extern_list
10222 {
10223 struct unicosmk_extern_list *next;
10224 const char *name;
10225 };
10226
10227 static struct unicosmk_extern_list *unicosmk_extern_head = 0;
10228
10229 /* Output extern declarations which are required for every asm file. */
10230
10231 static void
10232 unicosmk_output_default_externs (FILE *file)
10233 {
10234 static const char *const externs[] =
10235 { "__T3E_MISMATCH" };
10236
10237 int i;
10238 int n;
10239
10240 n = ARRAY_SIZE (externs);
10241
10242 for (i = 0; i < n; i++)
10243 fprintf (file, "\t.extern\t%s\n", externs[i]);
10244 }
10245
10246 /* Output extern declarations for global symbols which are have been
10247 referenced but not defined. */
10248
10249 static void
10250 unicosmk_output_externs (FILE *file)
10251 {
10252 struct unicosmk_extern_list *p;
10253 const char *real_name;
10254 int len;
10255 tree name_tree;
10256
10257 len = strlen (user_label_prefix);
10258 for (p = unicosmk_extern_head; p != 0; p = p->next)
10259 {
10260 /* We have to strip the encoding and possibly remove user_label_prefix
10261 from the identifier in order to handle -fleading-underscore and
10262 explicit asm names correctly (cf. gcc.dg/asm-names-1.c). */
10263 real_name = default_strip_name_encoding (p->name);
10264 if (len && p->name[0] == '*'
10265 && !memcmp (real_name, user_label_prefix, len))
10266 real_name += len;
10267
10268 name_tree = get_identifier (real_name);
10269 if (! TREE_ASM_WRITTEN (name_tree))
10270 {
10271 TREE_ASM_WRITTEN (name_tree) = 1;
10272 fputs ("\t.extern\t", file);
10273 assemble_name (file, p->name);
10274 putc ('\n', file);
10275 }
10276 }
10277 }
10278
10279 /* Record an extern. */
10280
10281 void
10282 unicosmk_add_extern (const char *name)
10283 {
10284 struct unicosmk_extern_list *p;
10285
10286 p = (struct unicosmk_extern_list *)
10287 xmalloc (sizeof (struct unicosmk_extern_list));
10288 p->next = unicosmk_extern_head;
10289 p->name = name;
10290 unicosmk_extern_head = p;
10291 }
10292
10293 /* The Cray assembler generates incorrect code if identifiers which
10294 conflict with register names are used as instruction operands. We have
10295 to replace such identifiers with DEX expressions. */
10296
10297 /* Structure to collect identifiers which have been replaced by DEX
10298 expressions. */
10299 /* FIXME: needs to use GC, so it can be saved and restored for PCH. */
10300
10301 struct unicosmk_dex {
10302 struct unicosmk_dex *next;
10303 const char *name;
10304 };
10305
10306 /* List of identifiers which have been replaced by DEX expressions. The DEX
10307 number is determined by the position in the list. */
10308
10309 static struct unicosmk_dex *unicosmk_dex_list = NULL;
10310
10311 /* The number of elements in the DEX list. */
10312
10313 static int unicosmk_dex_count = 0;
10314
10315 /* Check if NAME must be replaced by a DEX expression. */
10316
10317 static int
10318 unicosmk_special_name (const char *name)
10319 {
10320 if (name[0] == '*')
10321 ++name;
10322
10323 if (name[0] == '$')
10324 ++name;
10325
10326 if (name[0] != 'r' && name[0] != 'f' && name[0] != 'R' && name[0] != 'F')
10327 return 0;
10328
10329 switch (name[1])
10330 {
10331 case '1': case '2':
10332 return (name[2] == '\0' || (ISDIGIT (name[2]) && name[3] == '\0'));
10333
10334 case '3':
10335 return (name[2] == '\0'
10336 || ((name[2] == '0' || name[2] == '1') && name[3] == '\0'));
10337
10338 default:
10339 return (ISDIGIT (name[1]) && name[2] == '\0');
10340 }
10341 }
10342
10343 /* Return the DEX number if X must be replaced by a DEX expression and 0
10344 otherwise. */
10345
10346 static int
10347 unicosmk_need_dex (rtx x)
10348 {
10349 struct unicosmk_dex *dex;
10350 const char *name;
10351 int i;
10352
10353 if (GET_CODE (x) != SYMBOL_REF)
10354 return 0;
10355
10356 name = XSTR (x,0);
10357 if (! unicosmk_special_name (name))
10358 return 0;
10359
10360 i = unicosmk_dex_count;
10361 for (dex = unicosmk_dex_list; dex; dex = dex->next)
10362 {
10363 if (! strcmp (name, dex->name))
10364 return i;
10365 --i;
10366 }
10367
10368 dex = (struct unicosmk_dex *) xmalloc (sizeof (struct unicosmk_dex));
10369 dex->name = name;
10370 dex->next = unicosmk_dex_list;
10371 unicosmk_dex_list = dex;
10372
10373 ++unicosmk_dex_count;
10374 return unicosmk_dex_count;
10375 }
10376
10377 /* Output the DEX definitions for this file. */
10378
10379 static void
10380 unicosmk_output_dex (FILE *file)
10381 {
10382 struct unicosmk_dex *dex;
10383 int i;
10384
10385 if (unicosmk_dex_list == NULL)
10386 return;
10387
10388 fprintf (file, "\t.dexstart\n");
10389
10390 i = unicosmk_dex_count;
10391 for (dex = unicosmk_dex_list; dex; dex = dex->next)
10392 {
10393 fprintf (file, "\tDEX (%d) = ", i);
10394 assemble_name (file, dex->name);
10395 putc ('\n', file);
10396 --i;
10397 }
10398
10399 fprintf (file, "\t.dexend\n");
10400 }
10401
10402 /* Output text that to appear at the beginning of an assembler file. */
10403
10404 static void
10405 unicosmk_file_start (void)
10406 {
10407 int i;
10408
10409 fputs ("\t.ident\t", asm_out_file);
10410 unicosmk_output_module_name (asm_out_file);
10411 fputs ("\n\n", asm_out_file);
10412
10413 /* The Unicos/Mk assembler uses different register names. Instead of trying
10414 to support them, we simply use micro definitions. */
10415
10416 /* CAM has different register names: rN for the integer register N and fN
10417 for the floating-point register N. Instead of trying to use these in
10418 alpha.md, we define the symbols $N and $fN to refer to the appropriate
10419 register. */
10420
10421 for (i = 0; i < 32; ++i)
10422 fprintf (asm_out_file, "$%d <- r%d\n", i, i);
10423
10424 for (i = 0; i < 32; ++i)
10425 fprintf (asm_out_file, "$f%d <- f%d\n", i, i);
10426
10427 putc ('\n', asm_out_file);
10428
10429 /* The .align directive fill unused space with zeroes which does not work
10430 in code sections. We define the macro 'gcc@code@align' which uses nops
10431 instead. Note that it assumes that code sections always have the
10432 biggest possible alignment since . refers to the current offset from
10433 the beginning of the section. */
10434
10435 fputs ("\t.macro gcc@code@align n\n", asm_out_file);
10436 fputs ("gcc@n@bytes = 1 << n\n", asm_out_file);
10437 fputs ("gcc@here = . % gcc@n@bytes\n", asm_out_file);
10438 fputs ("\t.if ne, gcc@here, 0\n", asm_out_file);
10439 fputs ("\t.repeat (gcc@n@bytes - gcc@here) / 4\n", asm_out_file);
10440 fputs ("\tbis r31,r31,r31\n", asm_out_file);
10441 fputs ("\t.endr\n", asm_out_file);
10442 fputs ("\t.endif\n", asm_out_file);
10443 fputs ("\t.endm gcc@code@align\n\n", asm_out_file);
10444
10445 /* Output extern declarations which should always be visible. */
10446 unicosmk_output_default_externs (asm_out_file);
10447
10448 /* Open a dummy section. We always need to be inside a section for the
10449 section-switching code to work correctly.
10450 ??? This should be a module id or something like that. I still have to
10451 figure out what the rules for those are. */
10452 fputs ("\n\t.psect\t$SG00000,data\n", asm_out_file);
10453 }
10454
10455 /* Output text to appear at the end of an assembler file. This includes all
10456 pending extern declarations and DEX expressions. */
10457
10458 static void
10459 unicosmk_file_end (void)
10460 {
10461 fputs ("\t.endp\n\n", asm_out_file);
10462
10463 /* Output all pending externs. */
10464
10465 unicosmk_output_externs (asm_out_file);
10466
10467 /* Output dex definitions used for functions whose names conflict with
10468 register names. */
10469
10470 unicosmk_output_dex (asm_out_file);
10471
10472 fputs ("\t.end\t", asm_out_file);
10473 unicosmk_output_module_name (asm_out_file);
10474 putc ('\n', asm_out_file);
10475 }
10476
10477 #else
10478
10479 static void
10480 unicosmk_output_deferred_case_vectors (FILE *file ATTRIBUTE_UNUSED)
10481 {}
10482
10483 static void
10484 unicosmk_gen_dsib (unsigned long *imaskP ATTRIBUTE_UNUSED)
10485 {}
10486
10487 static void
10488 unicosmk_output_ssib (FILE * file ATTRIBUTE_UNUSED,
10489 const char * fnname ATTRIBUTE_UNUSED)
10490 {}
10491
10492 rtx
10493 unicosmk_add_call_info_word (rtx x ATTRIBUTE_UNUSED)
10494 {
10495 return NULL_RTX;
10496 }
10497
10498 static int
10499 unicosmk_need_dex (rtx x ATTRIBUTE_UNUSED)
10500 {
10501 return 0;
10502 }
10503
10504 #endif /* TARGET_ABI_UNICOSMK */
10505
10506 static void
10507 alpha_init_libfuncs (void)
10508 {
10509 if (TARGET_ABI_UNICOSMK)
10510 {
10511 /* Prevent gcc from generating calls to __divsi3. */
10512 set_optab_libfunc (sdiv_optab, SImode, 0);
10513 set_optab_libfunc (udiv_optab, SImode, 0);
10514
10515 /* Use the functions provided by the system library
10516 for DImode integer division. */
10517 set_optab_libfunc (sdiv_optab, DImode, "$sldiv");
10518 set_optab_libfunc (udiv_optab, DImode, "$uldiv");
10519 }
10520 else if (TARGET_ABI_OPEN_VMS)
10521 {
10522 /* Use the VMS runtime library functions for division and
10523 remainder. */
10524 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10525 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10526 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10527 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10528 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10529 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10530 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10531 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10532 }
10533 }
10534
10535 \f
10536 /* Initialize the GCC target structure. */
10537 #if TARGET_ABI_OPEN_VMS
10538 # undef TARGET_ATTRIBUTE_TABLE
10539 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
10540 # undef TARGET_SECTION_TYPE_FLAGS
10541 # define TARGET_SECTION_TYPE_FLAGS vms_section_type_flags
10542 #endif
10543
10544 #undef TARGET_IN_SMALL_DATA_P
10545 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
10546
10547 #if TARGET_ABI_UNICOSMK
10548 # undef TARGET_INSERT_ATTRIBUTES
10549 # define TARGET_INSERT_ATTRIBUTES unicosmk_insert_attributes
10550 # undef TARGET_SECTION_TYPE_FLAGS
10551 # define TARGET_SECTION_TYPE_FLAGS unicosmk_section_type_flags
10552 # undef TARGET_ASM_UNIQUE_SECTION
10553 # define TARGET_ASM_UNIQUE_SECTION unicosmk_unique_section
10554 #undef TARGET_ASM_FUNCTION_RODATA_SECTION
10555 #define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
10556 # undef TARGET_ASM_GLOBALIZE_LABEL
10557 # define TARGET_ASM_GLOBALIZE_LABEL hook_void_FILEptr_constcharptr
10558 # undef TARGET_MUST_PASS_IN_STACK
10559 # define TARGET_MUST_PASS_IN_STACK unicosmk_must_pass_in_stack
10560 #endif
10561
10562 #undef TARGET_ASM_ALIGNED_HI_OP
10563 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10564 #undef TARGET_ASM_ALIGNED_DI_OP
10565 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10566
10567 /* Default unaligned ops are provided for ELF systems. To get unaligned
10568 data for non-ELF systems, we have to turn off auto alignment. */
10569 #ifndef OBJECT_FORMAT_ELF
10570 #undef TARGET_ASM_UNALIGNED_HI_OP
10571 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
10572 #undef TARGET_ASM_UNALIGNED_SI_OP
10573 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
10574 #undef TARGET_ASM_UNALIGNED_DI_OP
10575 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
10576 #endif
10577
10578 #ifdef OBJECT_FORMAT_ELF
10579 #undef TARGET_ASM_RELOC_RW_MASK
10580 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
10581 #undef TARGET_ASM_SELECT_RTX_SECTION
10582 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
10583 #endif
10584
10585 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
10586 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
10587
10588 #undef TARGET_INIT_LIBFUNCS
10589 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
10590
10591 #if TARGET_ABI_UNICOSMK
10592 #undef TARGET_ASM_FILE_START
10593 #define TARGET_ASM_FILE_START unicosmk_file_start
10594 #undef TARGET_ASM_FILE_END
10595 #define TARGET_ASM_FILE_END unicosmk_file_end
10596 #else
10597 #undef TARGET_ASM_FILE_START
10598 #define TARGET_ASM_FILE_START alpha_file_start
10599 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
10600 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
10601 #endif
10602
10603 #undef TARGET_SCHED_ADJUST_COST
10604 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
10605 #undef TARGET_SCHED_ISSUE_RATE
10606 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
10607 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
10608 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
10609 alpha_multipass_dfa_lookahead
10610
10611 #undef TARGET_HAVE_TLS
10612 #define TARGET_HAVE_TLS HAVE_AS_TLS
10613
10614 #undef TARGET_INIT_BUILTINS
10615 #define TARGET_INIT_BUILTINS alpha_init_builtins
10616 #undef TARGET_EXPAND_BUILTIN
10617 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
10618 #undef TARGET_FOLD_BUILTIN
10619 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
10620
10621 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10622 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
10623 #undef TARGET_CANNOT_COPY_INSN_P
10624 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
10625 #undef TARGET_CANNOT_FORCE_CONST_MEM
10626 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
10627
10628 #if TARGET_ABI_OSF
10629 #undef TARGET_ASM_OUTPUT_MI_THUNK
10630 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10631 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10632 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
10633 #undef TARGET_STDARG_OPTIMIZE_HOOK
10634 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10635 #endif
10636
10637 #undef TARGET_RTX_COSTS
10638 #define TARGET_RTX_COSTS alpha_rtx_costs
10639 #undef TARGET_ADDRESS_COST
10640 #define TARGET_ADDRESS_COST hook_int_rtx_0
10641
10642 #undef TARGET_MACHINE_DEPENDENT_REORG
10643 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10644
10645 #undef TARGET_PROMOTE_FUNCTION_ARGS
10646 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
10647 #undef TARGET_PROMOTE_FUNCTION_RETURN
10648 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
10649 #undef TARGET_PROMOTE_PROTOTYPES
10650 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_false
10651 #undef TARGET_RETURN_IN_MEMORY
10652 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10653 #undef TARGET_PASS_BY_REFERENCE
10654 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10655 #undef TARGET_SETUP_INCOMING_VARARGS
10656 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10657 #undef TARGET_STRICT_ARGUMENT_NAMING
10658 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10659 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10660 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10661 #undef TARGET_SPLIT_COMPLEX_ARG
10662 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10663 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10664 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10665 #undef TARGET_ARG_PARTIAL_BYTES
10666 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10667
10668 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10669 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10670 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10671 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10672
10673 #undef TARGET_BUILD_BUILTIN_VA_LIST
10674 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10675
10676 /* The Alpha architecture does not require sequential consistency. See
10677 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
10678 for an example of how it can be violated in practice. */
10679 #undef TARGET_RELAXED_ORDERING
10680 #define TARGET_RELAXED_ORDERING true
10681
10682 #undef TARGET_DEFAULT_TARGET_FLAGS
10683 #define TARGET_DEFAULT_TARGET_FLAGS \
10684 (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS)
10685 #undef TARGET_HANDLE_OPTION
10686 #define TARGET_HANDLE_OPTION alpha_handle_option
10687
10688 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10689 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
10690 #define TARGET_MANGLE_FUNDAMENTAL_TYPE alpha_mangle_fundamental_type
10691 #endif
10692
10693 struct gcc_target targetm = TARGET_INITIALIZER;
10694
10695 \f
10696 #include "gt-alpha.h"