alpha.c (alpha_start_function): Adjust condition to create VMS trampoline entry point.
[gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "obstack.h"
42 #include "except.h"
43 #include "function.h"
44 #include "diagnostic-core.h"
45 #include "ggc.h"
46 #include "integrate.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "common/common-target.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "splay-tree.h"
54 #include "cfglayout.h"
55 #include "gimple.h"
56 #include "tree-flow.h"
57 #include "tree-stdarg.h"
58 #include "tm-constrs.h"
59 #include "df.h"
60 #include "libfuncs.h"
61 #include "opts.h"
62
63 /* Specify which cpu to schedule for. */
64 enum processor_type alpha_tune;
65
66 /* Which cpu we're generating code for. */
67 enum processor_type alpha_cpu;
68
69 static const char * const alpha_cpu_name[] =
70 {
71 "ev4", "ev5", "ev6"
72 };
73
74 /* Specify how accurate floating-point traps need to be. */
75
76 enum alpha_trap_precision alpha_tp;
77
78 /* Specify the floating-point rounding mode. */
79
80 enum alpha_fp_rounding_mode alpha_fprm;
81
82 /* Specify which things cause traps. */
83
84 enum alpha_fp_trap_mode alpha_fptm;
85
86 /* Nonzero if inside of a function, because the Alpha asm can't
87 handle .files inside of functions. */
88
89 static int inside_function = FALSE;
90
91 /* The number of cycles of latency we should assume on memory reads. */
92
93 int alpha_memory_latency = 3;
94
95 /* Whether the function needs the GP. */
96
97 static int alpha_function_needs_gp;
98
99 /* The assembler name of the current function. */
100
101 static const char *alpha_fnname;
102
103 /* The next explicit relocation sequence number. */
104 extern GTY(()) int alpha_next_sequence_number;
105 int alpha_next_sequence_number = 1;
106
107 /* The literal and gpdisp sequence numbers for this insn, as printed
108 by %# and %* respectively. */
109 extern GTY(()) int alpha_this_literal_sequence_number;
110 extern GTY(()) int alpha_this_gpdisp_sequence_number;
111 int alpha_this_literal_sequence_number;
112 int alpha_this_gpdisp_sequence_number;
113
114 /* Costs of various operations on the different architectures. */
115
116 struct alpha_rtx_cost_data
117 {
118 unsigned char fp_add;
119 unsigned char fp_mult;
120 unsigned char fp_div_sf;
121 unsigned char fp_div_df;
122 unsigned char int_mult_si;
123 unsigned char int_mult_di;
124 unsigned char int_shift;
125 unsigned char int_cmov;
126 unsigned short int_div;
127 };
128
129 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
130 {
131 { /* EV4 */
132 COSTS_N_INSNS (6), /* fp_add */
133 COSTS_N_INSNS (6), /* fp_mult */
134 COSTS_N_INSNS (34), /* fp_div_sf */
135 COSTS_N_INSNS (63), /* fp_div_df */
136 COSTS_N_INSNS (23), /* int_mult_si */
137 COSTS_N_INSNS (23), /* int_mult_di */
138 COSTS_N_INSNS (2), /* int_shift */
139 COSTS_N_INSNS (2), /* int_cmov */
140 COSTS_N_INSNS (97), /* int_div */
141 },
142 { /* EV5 */
143 COSTS_N_INSNS (4), /* fp_add */
144 COSTS_N_INSNS (4), /* fp_mult */
145 COSTS_N_INSNS (15), /* fp_div_sf */
146 COSTS_N_INSNS (22), /* fp_div_df */
147 COSTS_N_INSNS (8), /* int_mult_si */
148 COSTS_N_INSNS (12), /* int_mult_di */
149 COSTS_N_INSNS (1) + 1, /* int_shift */
150 COSTS_N_INSNS (1), /* int_cmov */
151 COSTS_N_INSNS (83), /* int_div */
152 },
153 { /* EV6 */
154 COSTS_N_INSNS (4), /* fp_add */
155 COSTS_N_INSNS (4), /* fp_mult */
156 COSTS_N_INSNS (12), /* fp_div_sf */
157 COSTS_N_INSNS (15), /* fp_div_df */
158 COSTS_N_INSNS (7), /* int_mult_si */
159 COSTS_N_INSNS (7), /* int_mult_di */
160 COSTS_N_INSNS (1), /* int_shift */
161 COSTS_N_INSNS (2), /* int_cmov */
162 COSTS_N_INSNS (86), /* int_div */
163 },
164 };
165
166 /* Similar but tuned for code size instead of execution latency. The
167 extra +N is fractional cost tuning based on latency. It's used to
168 encourage use of cheaper insns like shift, but only if there's just
169 one of them. */
170
171 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
172 {
173 COSTS_N_INSNS (1), /* fp_add */
174 COSTS_N_INSNS (1), /* fp_mult */
175 COSTS_N_INSNS (1), /* fp_div_sf */
176 COSTS_N_INSNS (1) + 1, /* fp_div_df */
177 COSTS_N_INSNS (1) + 1, /* int_mult_si */
178 COSTS_N_INSNS (1) + 2, /* int_mult_di */
179 COSTS_N_INSNS (1), /* int_shift */
180 COSTS_N_INSNS (1), /* int_cmov */
181 COSTS_N_INSNS (6), /* int_div */
182 };
183
184 /* Get the number of args of a function in one of two ways. */
185 #if TARGET_ABI_OPEN_VMS
186 #define NUM_ARGS crtl->args.info.num_args
187 #else
188 #define NUM_ARGS crtl->args.info
189 #endif
190
191 #define REG_PV 27
192 #define REG_RA 26
193
194 /* Declarations of static functions. */
195 static struct machine_function *alpha_init_machine_status (void);
196 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
197
198 #if TARGET_ABI_OPEN_VMS
199 static void alpha_write_linkage (FILE *, const char *);
200 static bool vms_valid_pointer_mode (enum machine_mode);
201 #else
202 #define vms_patch_builtins() gcc_unreachable()
203 #endif
204 \f
205 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
206 /* Implement TARGET_MANGLE_TYPE. */
207
208 static const char *
209 alpha_mangle_type (const_tree type)
210 {
211 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
212 && TARGET_LONG_DOUBLE_128)
213 return "g";
214
215 /* For all other types, use normal C++ mangling. */
216 return NULL;
217 }
218 #endif
219
220 /* Parse target option strings. */
221
222 static void
223 alpha_option_override (void)
224 {
225 static const struct cpu_table {
226 const char *const name;
227 const enum processor_type processor;
228 const int flags;
229 } cpu_table[] = {
230 { "ev4", PROCESSOR_EV4, 0 },
231 { "ev45", PROCESSOR_EV4, 0 },
232 { "21064", PROCESSOR_EV4, 0 },
233 { "ev5", PROCESSOR_EV5, 0 },
234 { "21164", PROCESSOR_EV5, 0 },
235 { "ev56", PROCESSOR_EV5, MASK_BWX },
236 { "21164a", PROCESSOR_EV5, MASK_BWX },
237 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX },
238 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
239 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
240 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
241 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
242 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
243 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX }
244 };
245
246 int const ct_size = ARRAY_SIZE (cpu_table);
247 int i;
248
249 #ifdef SUBTARGET_OVERRIDE_OPTIONS
250 SUBTARGET_OVERRIDE_OPTIONS;
251 #endif
252
253 alpha_fprm = ALPHA_FPRM_NORM;
254 alpha_tp = ALPHA_TP_PROG;
255 alpha_fptm = ALPHA_FPTM_N;
256
257 if (TARGET_IEEE)
258 {
259 alpha_tp = ALPHA_TP_INSN;
260 alpha_fptm = ALPHA_FPTM_SU;
261 }
262 if (TARGET_IEEE_WITH_INEXACT)
263 {
264 alpha_tp = ALPHA_TP_INSN;
265 alpha_fptm = ALPHA_FPTM_SUI;
266 }
267
268 if (alpha_tp_string)
269 {
270 if (! strcmp (alpha_tp_string, "p"))
271 alpha_tp = ALPHA_TP_PROG;
272 else if (! strcmp (alpha_tp_string, "f"))
273 alpha_tp = ALPHA_TP_FUNC;
274 else if (! strcmp (alpha_tp_string, "i"))
275 alpha_tp = ALPHA_TP_INSN;
276 else
277 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
278 }
279
280 if (alpha_fprm_string)
281 {
282 if (! strcmp (alpha_fprm_string, "n"))
283 alpha_fprm = ALPHA_FPRM_NORM;
284 else if (! strcmp (alpha_fprm_string, "m"))
285 alpha_fprm = ALPHA_FPRM_MINF;
286 else if (! strcmp (alpha_fprm_string, "c"))
287 alpha_fprm = ALPHA_FPRM_CHOP;
288 else if (! strcmp (alpha_fprm_string,"d"))
289 alpha_fprm = ALPHA_FPRM_DYN;
290 else
291 error ("bad value %qs for -mfp-rounding-mode switch",
292 alpha_fprm_string);
293 }
294
295 if (alpha_fptm_string)
296 {
297 if (strcmp (alpha_fptm_string, "n") == 0)
298 alpha_fptm = ALPHA_FPTM_N;
299 else if (strcmp (alpha_fptm_string, "u") == 0)
300 alpha_fptm = ALPHA_FPTM_U;
301 else if (strcmp (alpha_fptm_string, "su") == 0)
302 alpha_fptm = ALPHA_FPTM_SU;
303 else if (strcmp (alpha_fptm_string, "sui") == 0)
304 alpha_fptm = ALPHA_FPTM_SUI;
305 else
306 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
307 }
308
309 if (alpha_cpu_string)
310 {
311 for (i = 0; i < ct_size; i++)
312 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
313 {
314 alpha_tune = alpha_cpu = cpu_table [i].processor;
315 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
316 target_flags |= cpu_table [i].flags;
317 break;
318 }
319 if (i == ct_size)
320 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
321 }
322
323 if (alpha_tune_string)
324 {
325 for (i = 0; i < ct_size; i++)
326 if (! strcmp (alpha_tune_string, cpu_table [i].name))
327 {
328 alpha_tune = cpu_table [i].processor;
329 break;
330 }
331 if (i == ct_size)
332 error ("bad value %qs for -mtune switch", alpha_tune_string);
333 }
334
335 /* Do some sanity checks on the above options. */
336
337 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
338 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
339 {
340 warning (0, "fp software completion requires -mtrap-precision=i");
341 alpha_tp = ALPHA_TP_INSN;
342 }
343
344 if (alpha_cpu == PROCESSOR_EV6)
345 {
346 /* Except for EV6 pass 1 (not released), we always have precise
347 arithmetic traps. Which means we can do software completion
348 without minding trap shadows. */
349 alpha_tp = ALPHA_TP_PROG;
350 }
351
352 if (TARGET_FLOAT_VAX)
353 {
354 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
355 {
356 warning (0, "rounding mode not supported for VAX floats");
357 alpha_fprm = ALPHA_FPRM_NORM;
358 }
359 if (alpha_fptm == ALPHA_FPTM_SUI)
360 {
361 warning (0, "trap mode not supported for VAX floats");
362 alpha_fptm = ALPHA_FPTM_SU;
363 }
364 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
365 warning (0, "128-bit long double not supported for VAX floats");
366 target_flags &= ~MASK_LONG_DOUBLE_128;
367 }
368
369 {
370 char *end;
371 int lat;
372
373 if (!alpha_mlat_string)
374 alpha_mlat_string = "L1";
375
376 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
377 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
378 ;
379 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
380 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
381 && alpha_mlat_string[2] == '\0')
382 {
383 static int const cache_latency[][4] =
384 {
385 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
386 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
387 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
388 };
389
390 lat = alpha_mlat_string[1] - '0';
391 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
392 {
393 warning (0, "L%d cache latency unknown for %s",
394 lat, alpha_cpu_name[alpha_tune]);
395 lat = 3;
396 }
397 else
398 lat = cache_latency[alpha_tune][lat-1];
399 }
400 else if (! strcmp (alpha_mlat_string, "main"))
401 {
402 /* Most current memories have about 370ns latency. This is
403 a reasonable guess for a fast cpu. */
404 lat = 150;
405 }
406 else
407 {
408 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
409 lat = 3;
410 }
411
412 alpha_memory_latency = lat;
413 }
414
415 /* Default the definition of "small data" to 8 bytes. */
416 if (!global_options_set.x_g_switch_value)
417 g_switch_value = 8;
418
419 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
420 if (flag_pic == 1)
421 target_flags |= MASK_SMALL_DATA;
422 else if (flag_pic == 2)
423 target_flags &= ~MASK_SMALL_DATA;
424
425 /* Align labels and loops for optimal branching. */
426 /* ??? Kludge these by not doing anything if we don't optimize and also if
427 we are writing ECOFF symbols to work around a bug in DEC's assembler. */
428 if (optimize > 0 && write_symbols != SDB_DEBUG)
429 {
430 if (align_loops <= 0)
431 align_loops = 16;
432 if (align_jumps <= 0)
433 align_jumps = 16;
434 }
435 if (align_functions <= 0)
436 align_functions = 16;
437
438 /* Register variables and functions with the garbage collector. */
439
440 /* Set up function hooks. */
441 init_machine_status = alpha_init_machine_status;
442
443 /* Tell the compiler when we're using VAX floating point. */
444 if (TARGET_FLOAT_VAX)
445 {
446 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
447 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
448 REAL_MODE_FORMAT (TFmode) = NULL;
449 }
450
451 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
452 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
453 target_flags |= MASK_LONG_DOUBLE_128;
454 #endif
455 }
456 \f
457 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
458
459 int
460 zap_mask (HOST_WIDE_INT value)
461 {
462 int i;
463
464 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
465 i++, value >>= 8)
466 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
467 return 0;
468
469 return 1;
470 }
471
472 /* Return true if OP is valid for a particular TLS relocation.
473 We are already guaranteed that OP is a CONST. */
474
475 int
476 tls_symbolic_operand_1 (rtx op, int size, int unspec)
477 {
478 op = XEXP (op, 0);
479
480 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
481 return 0;
482 op = XVECEXP (op, 0, 0);
483
484 if (GET_CODE (op) != SYMBOL_REF)
485 return 0;
486
487 switch (SYMBOL_REF_TLS_MODEL (op))
488 {
489 case TLS_MODEL_LOCAL_DYNAMIC:
490 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
491 case TLS_MODEL_INITIAL_EXEC:
492 return unspec == UNSPEC_TPREL && size == 64;
493 case TLS_MODEL_LOCAL_EXEC:
494 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
495 default:
496 gcc_unreachable ();
497 }
498 }
499
500 /* Used by aligned_memory_operand and unaligned_memory_operand to
501 resolve what reload is going to do with OP if it's a register. */
502
503 rtx
504 resolve_reload_operand (rtx op)
505 {
506 if (reload_in_progress)
507 {
508 rtx tmp = op;
509 if (GET_CODE (tmp) == SUBREG)
510 tmp = SUBREG_REG (tmp);
511 if (REG_P (tmp)
512 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
513 {
514 op = reg_equiv_memory_loc (REGNO (tmp));
515 if (op == 0)
516 return 0;
517 }
518 }
519 return op;
520 }
521
522 /* The scalar modes supported differs from the default check-what-c-supports
523 version in that sometimes TFmode is available even when long double
524 indicates only DFmode. */
525
526 static bool
527 alpha_scalar_mode_supported_p (enum machine_mode mode)
528 {
529 switch (mode)
530 {
531 case QImode:
532 case HImode:
533 case SImode:
534 case DImode:
535 case TImode: /* via optabs.c */
536 return true;
537
538 case SFmode:
539 case DFmode:
540 return true;
541
542 case TFmode:
543 return TARGET_HAS_XFLOATING_LIBS;
544
545 default:
546 return false;
547 }
548 }
549
550 /* Alpha implements a couple of integer vector mode operations when
551 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
552 which allows the vectorizer to operate on e.g. move instructions,
553 or when expand_vector_operations can do something useful. */
554
555 static bool
556 alpha_vector_mode_supported_p (enum machine_mode mode)
557 {
558 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
559 }
560
561 /* Return 1 if this function can directly return via $26. */
562
563 int
564 direct_return (void)
565 {
566 return (TARGET_ABI_OSF
567 && reload_completed
568 && alpha_sa_size () == 0
569 && get_frame_size () == 0
570 && crtl->outgoing_args_size == 0
571 && crtl->args.pretend_args_size == 0);
572 }
573
574 /* Return the TLS model to use for SYMBOL. */
575
576 static enum tls_model
577 tls_symbolic_operand_type (rtx symbol)
578 {
579 enum tls_model model;
580
581 if (GET_CODE (symbol) != SYMBOL_REF)
582 return TLS_MODEL_NONE;
583 model = SYMBOL_REF_TLS_MODEL (symbol);
584
585 /* Local-exec with a 64-bit size is the same code as initial-exec. */
586 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
587 model = TLS_MODEL_INITIAL_EXEC;
588
589 return model;
590 }
591 \f
592 /* Return true if the function DECL will share the same GP as any
593 function in the current unit of translation. */
594
595 static bool
596 decl_has_samegp (const_tree decl)
597 {
598 /* Functions that are not local can be overridden, and thus may
599 not share the same gp. */
600 if (!(*targetm.binds_local_p) (decl))
601 return false;
602
603 /* If -msmall-data is in effect, assume that there is only one GP
604 for the module, and so any local symbol has this property. We
605 need explicit relocations to be able to enforce this for symbols
606 not defined in this unit of translation, however. */
607 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
608 return true;
609
610 /* Functions that are not external are defined in this UoT. */
611 /* ??? Irritatingly, static functions not yet emitted are still
612 marked "external". Apply this to non-static functions only. */
613 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
614 }
615
616 /* Return true if EXP should be placed in the small data section. */
617
618 static bool
619 alpha_in_small_data_p (const_tree exp)
620 {
621 /* We want to merge strings, so we never consider them small data. */
622 if (TREE_CODE (exp) == STRING_CST)
623 return false;
624
625 /* Functions are never in the small data area. Duh. */
626 if (TREE_CODE (exp) == FUNCTION_DECL)
627 return false;
628
629 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
630 {
631 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
632 if (strcmp (section, ".sdata") == 0
633 || strcmp (section, ".sbss") == 0)
634 return true;
635 }
636 else
637 {
638 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
639
640 /* If this is an incomplete type with size 0, then we can't put it
641 in sdata because it might be too big when completed. */
642 if (size > 0 && size <= g_switch_value)
643 return true;
644 }
645
646 return false;
647 }
648
649 #if TARGET_ABI_OPEN_VMS
650 static bool
651 vms_valid_pointer_mode (enum machine_mode mode)
652 {
653 return (mode == SImode || mode == DImode);
654 }
655
656 static bool
657 alpha_linkage_symbol_p (const char *symname)
658 {
659 int symlen = strlen (symname);
660
661 if (symlen > 4)
662 return strcmp (&symname [symlen - 4], "..lk") == 0;
663
664 return false;
665 }
666
667 #define LINKAGE_SYMBOL_REF_P(X) \
668 ((GET_CODE (X) == SYMBOL_REF \
669 && alpha_linkage_symbol_p (XSTR (X, 0))) \
670 || (GET_CODE (X) == CONST \
671 && GET_CODE (XEXP (X, 0)) == PLUS \
672 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
673 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
674 #endif
675
676 /* legitimate_address_p recognizes an RTL expression that is a valid
677 memory address for an instruction. The MODE argument is the
678 machine mode for the MEM expression that wants to use this address.
679
680 For Alpha, we have either a constant address or the sum of a
681 register and a constant address, or just a register. For DImode,
682 any of those forms can be surrounded with an AND that clear the
683 low-order three bits; this is an "unaligned" access. */
684
685 static bool
686 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
687 {
688 /* If this is an ldq_u type address, discard the outer AND. */
689 if (mode == DImode
690 && GET_CODE (x) == AND
691 && CONST_INT_P (XEXP (x, 1))
692 && INTVAL (XEXP (x, 1)) == -8)
693 x = XEXP (x, 0);
694
695 /* Discard non-paradoxical subregs. */
696 if (GET_CODE (x) == SUBREG
697 && (GET_MODE_SIZE (GET_MODE (x))
698 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
699 x = SUBREG_REG (x);
700
701 /* Unadorned general registers are valid. */
702 if (REG_P (x)
703 && (strict
704 ? STRICT_REG_OK_FOR_BASE_P (x)
705 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
706 return true;
707
708 /* Constant addresses (i.e. +/- 32k) are valid. */
709 if (CONSTANT_ADDRESS_P (x))
710 return true;
711
712 #if TARGET_ABI_OPEN_VMS
713 if (LINKAGE_SYMBOL_REF_P (x))
714 return true;
715 #endif
716
717 /* Register plus a small constant offset is valid. */
718 if (GET_CODE (x) == PLUS)
719 {
720 rtx ofs = XEXP (x, 1);
721 x = XEXP (x, 0);
722
723 /* Discard non-paradoxical subregs. */
724 if (GET_CODE (x) == SUBREG
725 && (GET_MODE_SIZE (GET_MODE (x))
726 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
727 x = SUBREG_REG (x);
728
729 if (REG_P (x))
730 {
731 if (! strict
732 && NONSTRICT_REG_OK_FP_BASE_P (x)
733 && CONST_INT_P (ofs))
734 return true;
735 if ((strict
736 ? STRICT_REG_OK_FOR_BASE_P (x)
737 : NONSTRICT_REG_OK_FOR_BASE_P (x))
738 && CONSTANT_ADDRESS_P (ofs))
739 return true;
740 }
741 }
742
743 /* If we're managing explicit relocations, LO_SUM is valid, as are small
744 data symbols. Avoid explicit relocations of modes larger than word
745 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
746 else if (TARGET_EXPLICIT_RELOCS
747 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
748 {
749 if (small_symbolic_operand (x, Pmode))
750 return true;
751
752 if (GET_CODE (x) == LO_SUM)
753 {
754 rtx ofs = XEXP (x, 1);
755 x = XEXP (x, 0);
756
757 /* Discard non-paradoxical subregs. */
758 if (GET_CODE (x) == SUBREG
759 && (GET_MODE_SIZE (GET_MODE (x))
760 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
761 x = SUBREG_REG (x);
762
763 /* Must have a valid base register. */
764 if (! (REG_P (x)
765 && (strict
766 ? STRICT_REG_OK_FOR_BASE_P (x)
767 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
768 return false;
769
770 /* The symbol must be local. */
771 if (local_symbolic_operand (ofs, Pmode)
772 || dtp32_symbolic_operand (ofs, Pmode)
773 || tp32_symbolic_operand (ofs, Pmode))
774 return true;
775 }
776 }
777
778 return false;
779 }
780
781 /* Build the SYMBOL_REF for __tls_get_addr. */
782
783 static GTY(()) rtx tls_get_addr_libfunc;
784
785 static rtx
786 get_tls_get_addr (void)
787 {
788 if (!tls_get_addr_libfunc)
789 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
790 return tls_get_addr_libfunc;
791 }
792
793 /* Try machine-dependent ways of modifying an illegitimate address
794 to be legitimate. If we find one, return the new, valid address. */
795
796 static rtx
797 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
798 {
799 HOST_WIDE_INT addend;
800
801 /* If the address is (plus reg const_int) and the CONST_INT is not a
802 valid offset, compute the high part of the constant and add it to
803 the register. Then our address is (plus temp low-part-const). */
804 if (GET_CODE (x) == PLUS
805 && REG_P (XEXP (x, 0))
806 && CONST_INT_P (XEXP (x, 1))
807 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
808 {
809 addend = INTVAL (XEXP (x, 1));
810 x = XEXP (x, 0);
811 goto split_addend;
812 }
813
814 /* If the address is (const (plus FOO const_int)), find the low-order
815 part of the CONST_INT. Then load FOO plus any high-order part of the
816 CONST_INT into a register. Our address is (plus reg low-part-const).
817 This is done to reduce the number of GOT entries. */
818 if (can_create_pseudo_p ()
819 && GET_CODE (x) == CONST
820 && GET_CODE (XEXP (x, 0)) == PLUS
821 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
822 {
823 addend = INTVAL (XEXP (XEXP (x, 0), 1));
824 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
825 goto split_addend;
826 }
827
828 /* If we have a (plus reg const), emit the load as in (2), then add
829 the two registers, and finally generate (plus reg low-part-const) as
830 our address. */
831 if (can_create_pseudo_p ()
832 && GET_CODE (x) == PLUS
833 && REG_P (XEXP (x, 0))
834 && GET_CODE (XEXP (x, 1)) == CONST
835 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
836 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
837 {
838 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
839 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
840 XEXP (XEXP (XEXP (x, 1), 0), 0),
841 NULL_RTX, 1, OPTAB_LIB_WIDEN);
842 goto split_addend;
843 }
844
845 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
846 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
847 around +/- 32k offset. */
848 if (TARGET_EXPLICIT_RELOCS
849 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
850 && symbolic_operand (x, Pmode))
851 {
852 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
853
854 switch (tls_symbolic_operand_type (x))
855 {
856 case TLS_MODEL_NONE:
857 break;
858
859 case TLS_MODEL_GLOBAL_DYNAMIC:
860 start_sequence ();
861
862 r0 = gen_rtx_REG (Pmode, 0);
863 r16 = gen_rtx_REG (Pmode, 16);
864 tga = get_tls_get_addr ();
865 dest = gen_reg_rtx (Pmode);
866 seq = GEN_INT (alpha_next_sequence_number++);
867
868 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
869 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
870 insn = emit_call_insn (insn);
871 RTL_CONST_CALL_P (insn) = 1;
872 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
873
874 insn = get_insns ();
875 end_sequence ();
876
877 emit_libcall_block (insn, dest, r0, x);
878 return dest;
879
880 case TLS_MODEL_LOCAL_DYNAMIC:
881 start_sequence ();
882
883 r0 = gen_rtx_REG (Pmode, 0);
884 r16 = gen_rtx_REG (Pmode, 16);
885 tga = get_tls_get_addr ();
886 scratch = gen_reg_rtx (Pmode);
887 seq = GEN_INT (alpha_next_sequence_number++);
888
889 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
890 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
891 insn = emit_call_insn (insn);
892 RTL_CONST_CALL_P (insn) = 1;
893 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
894
895 insn = get_insns ();
896 end_sequence ();
897
898 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
899 UNSPEC_TLSLDM_CALL);
900 emit_libcall_block (insn, scratch, r0, eqv);
901
902 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
903 eqv = gen_rtx_CONST (Pmode, eqv);
904
905 if (alpha_tls_size == 64)
906 {
907 dest = gen_reg_rtx (Pmode);
908 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
909 emit_insn (gen_adddi3 (dest, dest, scratch));
910 return dest;
911 }
912 if (alpha_tls_size == 32)
913 {
914 insn = gen_rtx_HIGH (Pmode, eqv);
915 insn = gen_rtx_PLUS (Pmode, scratch, insn);
916 scratch = gen_reg_rtx (Pmode);
917 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
918 }
919 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
920
921 case TLS_MODEL_INITIAL_EXEC:
922 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
923 eqv = gen_rtx_CONST (Pmode, eqv);
924 tp = gen_reg_rtx (Pmode);
925 scratch = gen_reg_rtx (Pmode);
926 dest = gen_reg_rtx (Pmode);
927
928 emit_insn (gen_load_tp (tp));
929 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
930 emit_insn (gen_adddi3 (dest, tp, scratch));
931 return dest;
932
933 case TLS_MODEL_LOCAL_EXEC:
934 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
935 eqv = gen_rtx_CONST (Pmode, eqv);
936 tp = gen_reg_rtx (Pmode);
937
938 emit_insn (gen_load_tp (tp));
939 if (alpha_tls_size == 32)
940 {
941 insn = gen_rtx_HIGH (Pmode, eqv);
942 insn = gen_rtx_PLUS (Pmode, tp, insn);
943 tp = gen_reg_rtx (Pmode);
944 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
945 }
946 return gen_rtx_LO_SUM (Pmode, tp, eqv);
947
948 default:
949 gcc_unreachable ();
950 }
951
952 if (local_symbolic_operand (x, Pmode))
953 {
954 if (small_symbolic_operand (x, Pmode))
955 return x;
956 else
957 {
958 if (can_create_pseudo_p ())
959 scratch = gen_reg_rtx (Pmode);
960 emit_insn (gen_rtx_SET (VOIDmode, scratch,
961 gen_rtx_HIGH (Pmode, x)));
962 return gen_rtx_LO_SUM (Pmode, scratch, x);
963 }
964 }
965 }
966
967 return NULL;
968
969 split_addend:
970 {
971 HOST_WIDE_INT low, high;
972
973 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
974 addend -= low;
975 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
976 addend -= high;
977
978 if (addend)
979 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
980 (!can_create_pseudo_p () ? scratch : NULL_RTX),
981 1, OPTAB_LIB_WIDEN);
982 if (high)
983 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
984 (!can_create_pseudo_p () ? scratch : NULL_RTX),
985 1, OPTAB_LIB_WIDEN);
986
987 return plus_constant (x, low);
988 }
989 }
990
991
992 /* Try machine-dependent ways of modifying an illegitimate address
993 to be legitimate. Return X or the new, valid address. */
994
995 static rtx
996 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
997 enum machine_mode mode)
998 {
999 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1000 return new_x ? new_x : x;
1001 }
1002
1003 /* Primarily this is required for TLS symbols, but given that our move
1004 patterns *ought* to be able to handle any symbol at any time, we
1005 should never be spilling symbolic operands to the constant pool, ever. */
1006
1007 static bool
1008 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1009 {
1010 enum rtx_code code = GET_CODE (x);
1011 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1012 }
1013
1014 /* We do not allow indirect calls to be optimized into sibling calls, nor
1015 can we allow a call to a function with a different GP to be optimized
1016 into a sibcall. */
1017
1018 static bool
1019 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1020 {
1021 /* Can't do indirect tail calls, since we don't know if the target
1022 uses the same GP. */
1023 if (!decl)
1024 return false;
1025
1026 /* Otherwise, we can make a tail call if the target function shares
1027 the same GP. */
1028 return decl_has_samegp (decl);
1029 }
1030
1031 int
1032 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1033 {
1034 rtx x = *px;
1035
1036 /* Don't re-split. */
1037 if (GET_CODE (x) == LO_SUM)
1038 return -1;
1039
1040 return small_symbolic_operand (x, Pmode) != 0;
1041 }
1042
1043 static int
1044 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1045 {
1046 rtx x = *px;
1047
1048 /* Don't re-split. */
1049 if (GET_CODE (x) == LO_SUM)
1050 return -1;
1051
1052 if (small_symbolic_operand (x, Pmode))
1053 {
1054 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1055 *px = x;
1056 return -1;
1057 }
1058
1059 return 0;
1060 }
1061
1062 rtx
1063 split_small_symbolic_operand (rtx x)
1064 {
1065 x = copy_insn (x);
1066 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1067 return x;
1068 }
1069
1070 /* Indicate that INSN cannot be duplicated. This is true for any insn
1071 that we've marked with gpdisp relocs, since those have to stay in
1072 1-1 correspondence with one another.
1073
1074 Technically we could copy them if we could set up a mapping from one
1075 sequence number to another, across the set of insns to be duplicated.
1076 This seems overly complicated and error-prone since interblock motion
1077 from sched-ebb could move one of the pair of insns to a different block.
1078
1079 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1080 then they'll be in a different block from their ldgp. Which could lead
1081 the bb reorder code to think that it would be ok to copy just the block
1082 containing the call and branch to the block containing the ldgp. */
1083
1084 static bool
1085 alpha_cannot_copy_insn_p (rtx insn)
1086 {
1087 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1088 return false;
1089 if (recog_memoized (insn) >= 0)
1090 return get_attr_cannot_copy (insn);
1091 else
1092 return false;
1093 }
1094
1095
1096 /* Try a machine-dependent way of reloading an illegitimate address
1097 operand. If we find one, push the reload and return the new rtx. */
1098
1099 rtx
1100 alpha_legitimize_reload_address (rtx x,
1101 enum machine_mode mode ATTRIBUTE_UNUSED,
1102 int opnum, int type,
1103 int ind_levels ATTRIBUTE_UNUSED)
1104 {
1105 /* We must recognize output that we have already generated ourselves. */
1106 if (GET_CODE (x) == PLUS
1107 && GET_CODE (XEXP (x, 0)) == PLUS
1108 && REG_P (XEXP (XEXP (x, 0), 0))
1109 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1110 && CONST_INT_P (XEXP (x, 1)))
1111 {
1112 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1113 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1114 opnum, (enum reload_type) type);
1115 return x;
1116 }
1117
1118 /* We wish to handle large displacements off a base register by
1119 splitting the addend across an ldah and the mem insn. This
1120 cuts number of extra insns needed from 3 to 1. */
1121 if (GET_CODE (x) == PLUS
1122 && REG_P (XEXP (x, 0))
1123 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1124 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1125 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1126 {
1127 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1128 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1129 HOST_WIDE_INT high
1130 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1131
1132 /* Check for 32-bit overflow. */
1133 if (high + low != val)
1134 return NULL_RTX;
1135
1136 /* Reload the high part into a base reg; leave the low part
1137 in the mem directly. */
1138 x = gen_rtx_PLUS (GET_MODE (x),
1139 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1140 GEN_INT (high)),
1141 GEN_INT (low));
1142
1143 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1144 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1145 opnum, (enum reload_type) type);
1146 return x;
1147 }
1148
1149 return NULL_RTX;
1150 }
1151 \f
1152 /* Compute a (partial) cost for rtx X. Return true if the complete
1153 cost has been computed, and false if subexpressions should be
1154 scanned. In either case, *TOTAL contains the cost result. */
1155
1156 static bool
1157 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1158 bool speed)
1159 {
1160 enum machine_mode mode = GET_MODE (x);
1161 bool float_mode_p = FLOAT_MODE_P (mode);
1162 const struct alpha_rtx_cost_data *cost_data;
1163
1164 if (!speed)
1165 cost_data = &alpha_rtx_cost_size;
1166 else
1167 cost_data = &alpha_rtx_cost_data[alpha_tune];
1168
1169 switch (code)
1170 {
1171 case CONST_INT:
1172 /* If this is an 8-bit constant, return zero since it can be used
1173 nearly anywhere with no cost. If it is a valid operand for an
1174 ADD or AND, likewise return 0 if we know it will be used in that
1175 context. Otherwise, return 2 since it might be used there later.
1176 All other constants take at least two insns. */
1177 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1178 {
1179 *total = 0;
1180 return true;
1181 }
1182 /* FALLTHRU */
1183
1184 case CONST_DOUBLE:
1185 if (x == CONST0_RTX (mode))
1186 *total = 0;
1187 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1188 || (outer_code == AND && and_operand (x, VOIDmode)))
1189 *total = 0;
1190 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1191 *total = 2;
1192 else
1193 *total = COSTS_N_INSNS (2);
1194 return true;
1195
1196 case CONST:
1197 case SYMBOL_REF:
1198 case LABEL_REF:
1199 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1200 *total = COSTS_N_INSNS (outer_code != MEM);
1201 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1202 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1203 else if (tls_symbolic_operand_type (x))
1204 /* Estimate of cost for call_pal rduniq. */
1205 /* ??? How many insns do we emit here? More than one... */
1206 *total = COSTS_N_INSNS (15);
1207 else
1208 /* Otherwise we do a load from the GOT. */
1209 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1210 return true;
1211
1212 case HIGH:
1213 /* This is effectively an add_operand. */
1214 *total = 2;
1215 return true;
1216
1217 case PLUS:
1218 case MINUS:
1219 if (float_mode_p)
1220 *total = cost_data->fp_add;
1221 else if (GET_CODE (XEXP (x, 0)) == MULT
1222 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1223 {
1224 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1225 (enum rtx_code) outer_code, opno, speed)
1226 + rtx_cost (XEXP (x, 1),
1227 (enum rtx_code) outer_code, opno, speed)
1228 + COSTS_N_INSNS (1));
1229 return true;
1230 }
1231 return false;
1232
1233 case MULT:
1234 if (float_mode_p)
1235 *total = cost_data->fp_mult;
1236 else if (mode == DImode)
1237 *total = cost_data->int_mult_di;
1238 else
1239 *total = cost_data->int_mult_si;
1240 return false;
1241
1242 case ASHIFT:
1243 if (CONST_INT_P (XEXP (x, 1))
1244 && INTVAL (XEXP (x, 1)) <= 3)
1245 {
1246 *total = COSTS_N_INSNS (1);
1247 return false;
1248 }
1249 /* FALLTHRU */
1250
1251 case ASHIFTRT:
1252 case LSHIFTRT:
1253 *total = cost_data->int_shift;
1254 return false;
1255
1256 case IF_THEN_ELSE:
1257 if (float_mode_p)
1258 *total = cost_data->fp_add;
1259 else
1260 *total = cost_data->int_cmov;
1261 return false;
1262
1263 case DIV:
1264 case UDIV:
1265 case MOD:
1266 case UMOD:
1267 if (!float_mode_p)
1268 *total = cost_data->int_div;
1269 else if (mode == SFmode)
1270 *total = cost_data->fp_div_sf;
1271 else
1272 *total = cost_data->fp_div_df;
1273 return false;
1274
1275 case MEM:
1276 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1277 return true;
1278
1279 case NEG:
1280 if (! float_mode_p)
1281 {
1282 *total = COSTS_N_INSNS (1);
1283 return false;
1284 }
1285 /* FALLTHRU */
1286
1287 case ABS:
1288 if (! float_mode_p)
1289 {
1290 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1291 return false;
1292 }
1293 /* FALLTHRU */
1294
1295 case FLOAT:
1296 case UNSIGNED_FLOAT:
1297 case FIX:
1298 case UNSIGNED_FIX:
1299 case FLOAT_TRUNCATE:
1300 *total = cost_data->fp_add;
1301 return false;
1302
1303 case FLOAT_EXTEND:
1304 if (MEM_P (XEXP (x, 0)))
1305 *total = 0;
1306 else
1307 *total = cost_data->fp_add;
1308 return false;
1309
1310 default:
1311 return false;
1312 }
1313 }
1314 \f
1315 /* REF is an alignable memory location. Place an aligned SImode
1316 reference into *PALIGNED_MEM and the number of bits to shift into
1317 *PBITNUM. SCRATCH is a free register for use in reloading out
1318 of range stack slots. */
1319
1320 void
1321 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1322 {
1323 rtx base;
1324 HOST_WIDE_INT disp, offset;
1325
1326 gcc_assert (MEM_P (ref));
1327
1328 if (reload_in_progress
1329 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1330 {
1331 base = find_replacement (&XEXP (ref, 0));
1332 gcc_assert (memory_address_p (GET_MODE (ref), base));
1333 }
1334 else
1335 base = XEXP (ref, 0);
1336
1337 if (GET_CODE (base) == PLUS)
1338 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1339 else
1340 disp = 0;
1341
1342 /* Find the byte offset within an aligned word. If the memory itself is
1343 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1344 will have examined the base register and determined it is aligned, and
1345 thus displacements from it are naturally alignable. */
1346 if (MEM_ALIGN (ref) >= 32)
1347 offset = 0;
1348 else
1349 offset = disp & 3;
1350
1351 /* The location should not cross aligned word boundary. */
1352 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1353 <= GET_MODE_SIZE (SImode));
1354
1355 /* Access the entire aligned word. */
1356 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1357
1358 /* Convert the byte offset within the word to a bit offset. */
1359 offset *= BITS_PER_UNIT;
1360 *pbitnum = GEN_INT (offset);
1361 }
1362
1363 /* Similar, but just get the address. Handle the two reload cases.
1364 Add EXTRA_OFFSET to the address we return. */
1365
1366 rtx
1367 get_unaligned_address (rtx ref)
1368 {
1369 rtx base;
1370 HOST_WIDE_INT offset = 0;
1371
1372 gcc_assert (MEM_P (ref));
1373
1374 if (reload_in_progress
1375 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1376 {
1377 base = find_replacement (&XEXP (ref, 0));
1378
1379 gcc_assert (memory_address_p (GET_MODE (ref), base));
1380 }
1381 else
1382 base = XEXP (ref, 0);
1383
1384 if (GET_CODE (base) == PLUS)
1385 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1386
1387 return plus_constant (base, offset);
1388 }
1389
1390 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1391 X is always returned in a register. */
1392
1393 rtx
1394 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1395 {
1396 if (GET_CODE (addr) == PLUS)
1397 {
1398 ofs += INTVAL (XEXP (addr, 1));
1399 addr = XEXP (addr, 0);
1400 }
1401
1402 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1403 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1404 }
1405
1406 /* On the Alpha, all (non-symbolic) constants except zero go into
1407 a floating-point register via memory. Note that we cannot
1408 return anything that is not a subset of RCLASS, and that some
1409 symbolic constants cannot be dropped to memory. */
1410
1411 enum reg_class
1412 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1413 {
1414 /* Zero is present in any register class. */
1415 if (x == CONST0_RTX (GET_MODE (x)))
1416 return rclass;
1417
1418 /* These sorts of constants we can easily drop to memory. */
1419 if (CONST_INT_P (x)
1420 || GET_CODE (x) == CONST_DOUBLE
1421 || GET_CODE (x) == CONST_VECTOR)
1422 {
1423 if (rclass == FLOAT_REGS)
1424 return NO_REGS;
1425 if (rclass == ALL_REGS)
1426 return GENERAL_REGS;
1427 return rclass;
1428 }
1429
1430 /* All other kinds of constants should not (and in the case of HIGH
1431 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1432 secondary reload. */
1433 if (CONSTANT_P (x))
1434 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1435
1436 return rclass;
1437 }
1438
1439 /* Inform reload about cases where moving X with a mode MODE to a register in
1440 RCLASS requires an extra scratch or immediate register. Return the class
1441 needed for the immediate register. */
1442
1443 static reg_class_t
1444 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1445 enum machine_mode mode, secondary_reload_info *sri)
1446 {
1447 enum reg_class rclass = (enum reg_class) rclass_i;
1448
1449 /* Loading and storing HImode or QImode values to and from memory
1450 usually requires a scratch register. */
1451 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1452 {
1453 if (any_memory_operand (x, mode))
1454 {
1455 if (in_p)
1456 {
1457 if (!aligned_memory_operand (x, mode))
1458 sri->icode = direct_optab_handler (reload_in_optab, mode);
1459 }
1460 else
1461 sri->icode = direct_optab_handler (reload_out_optab, mode);
1462 return NO_REGS;
1463 }
1464 }
1465
1466 /* We also cannot do integral arithmetic into FP regs, as might result
1467 from register elimination into a DImode fp register. */
1468 if (rclass == FLOAT_REGS)
1469 {
1470 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1471 return GENERAL_REGS;
1472 if (in_p && INTEGRAL_MODE_P (mode)
1473 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1474 return GENERAL_REGS;
1475 }
1476
1477 return NO_REGS;
1478 }
1479 \f
1480 /* Subfunction of the following function. Update the flags of any MEM
1481 found in part of X. */
1482
1483 static int
1484 alpha_set_memflags_1 (rtx *xp, void *data)
1485 {
1486 rtx x = *xp, orig = (rtx) data;
1487
1488 if (!MEM_P (x))
1489 return 0;
1490
1491 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1492 MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig);
1493 MEM_SCALAR_P (x) = MEM_SCALAR_P (orig);
1494 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1495 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1496
1497 /* Sadly, we cannot use alias sets because the extra aliasing
1498 produced by the AND interferes. Given that two-byte quantities
1499 are the only thing we would be able to differentiate anyway,
1500 there does not seem to be any point in convoluting the early
1501 out of the alias check. */
1502
1503 return -1;
1504 }
1505
1506 /* Given SEQ, which is an INSN list, look for any MEMs in either
1507 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1508 volatile flags from REF into each of the MEMs found. If REF is not
1509 a MEM, don't do anything. */
1510
1511 void
1512 alpha_set_memflags (rtx seq, rtx ref)
1513 {
1514 rtx insn;
1515
1516 if (!MEM_P (ref))
1517 return;
1518
1519 /* This is only called from alpha.md, after having had something
1520 generated from one of the insn patterns. So if everything is
1521 zero, the pattern is already up-to-date. */
1522 if (!MEM_VOLATILE_P (ref)
1523 && !MEM_IN_STRUCT_P (ref)
1524 && !MEM_SCALAR_P (ref)
1525 && !MEM_NOTRAP_P (ref)
1526 && !MEM_READONLY_P (ref))
1527 return;
1528
1529 for (insn = seq; insn; insn = NEXT_INSN (insn))
1530 if (INSN_P (insn))
1531 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1532 else
1533 gcc_unreachable ();
1534 }
1535 \f
1536 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1537 int, bool);
1538
1539 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1540 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1541 and return pc_rtx if successful. */
1542
1543 static rtx
1544 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1545 HOST_WIDE_INT c, int n, bool no_output)
1546 {
1547 HOST_WIDE_INT new_const;
1548 int i, bits;
1549 /* Use a pseudo if highly optimizing and still generating RTL. */
1550 rtx subtarget
1551 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1552 rtx temp, insn;
1553
1554 /* If this is a sign-extended 32-bit constant, we can do this in at most
1555 three insns, so do it if we have enough insns left. We always have
1556 a sign-extended 32-bit constant when compiling on a narrow machine. */
1557
1558 if (HOST_BITS_PER_WIDE_INT != 64
1559 || c >> 31 == -1 || c >> 31 == 0)
1560 {
1561 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1562 HOST_WIDE_INT tmp1 = c - low;
1563 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1564 HOST_WIDE_INT extra = 0;
1565
1566 /* If HIGH will be interpreted as negative but the constant is
1567 positive, we must adjust it to do two ldha insns. */
1568
1569 if ((high & 0x8000) != 0 && c >= 0)
1570 {
1571 extra = 0x4000;
1572 tmp1 -= 0x40000000;
1573 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1574 }
1575
1576 if (c == low || (low == 0 && extra == 0))
1577 {
1578 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1579 but that meant that we can't handle INT_MIN on 32-bit machines
1580 (like NT/Alpha), because we recurse indefinitely through
1581 emit_move_insn to gen_movdi. So instead, since we know exactly
1582 what we want, create it explicitly. */
1583
1584 if (no_output)
1585 return pc_rtx;
1586 if (target == NULL)
1587 target = gen_reg_rtx (mode);
1588 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1589 return target;
1590 }
1591 else if (n >= 2 + (extra != 0))
1592 {
1593 if (no_output)
1594 return pc_rtx;
1595 if (!can_create_pseudo_p ())
1596 {
1597 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1598 temp = target;
1599 }
1600 else
1601 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1602 subtarget, mode);
1603
1604 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1605 This means that if we go through expand_binop, we'll try to
1606 generate extensions, etc, which will require new pseudos, which
1607 will fail during some split phases. The SImode add patterns
1608 still exist, but are not named. So build the insns by hand. */
1609
1610 if (extra != 0)
1611 {
1612 if (! subtarget)
1613 subtarget = gen_reg_rtx (mode);
1614 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1615 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1616 emit_insn (insn);
1617 temp = subtarget;
1618 }
1619
1620 if (target == NULL)
1621 target = gen_reg_rtx (mode);
1622 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1623 insn = gen_rtx_SET (VOIDmode, target, insn);
1624 emit_insn (insn);
1625 return target;
1626 }
1627 }
1628
1629 /* If we couldn't do it that way, try some other methods. But if we have
1630 no instructions left, don't bother. Likewise, if this is SImode and
1631 we can't make pseudos, we can't do anything since the expand_binop
1632 and expand_unop calls will widen and try to make pseudos. */
1633
1634 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1635 return 0;
1636
1637 /* Next, see if we can load a related constant and then shift and possibly
1638 negate it to get the constant we want. Try this once each increasing
1639 numbers of insns. */
1640
1641 for (i = 1; i < n; i++)
1642 {
1643 /* First, see if minus some low bits, we've an easy load of
1644 high bits. */
1645
1646 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1647 if (new_const != 0)
1648 {
1649 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1650 if (temp)
1651 {
1652 if (no_output)
1653 return temp;
1654 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1655 target, 0, OPTAB_WIDEN);
1656 }
1657 }
1658
1659 /* Next try complementing. */
1660 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1661 if (temp)
1662 {
1663 if (no_output)
1664 return temp;
1665 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1666 }
1667
1668 /* Next try to form a constant and do a left shift. We can do this
1669 if some low-order bits are zero; the exact_log2 call below tells
1670 us that information. The bits we are shifting out could be any
1671 value, but here we'll just try the 0- and sign-extended forms of
1672 the constant. To try to increase the chance of having the same
1673 constant in more than one insn, start at the highest number of
1674 bits to shift, but try all possibilities in case a ZAPNOT will
1675 be useful. */
1676
1677 bits = exact_log2 (c & -c);
1678 if (bits > 0)
1679 for (; bits > 0; bits--)
1680 {
1681 new_const = c >> bits;
1682 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1683 if (!temp && c < 0)
1684 {
1685 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1686 temp = alpha_emit_set_const (subtarget, mode, new_const,
1687 i, no_output);
1688 }
1689 if (temp)
1690 {
1691 if (no_output)
1692 return temp;
1693 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1694 target, 0, OPTAB_WIDEN);
1695 }
1696 }
1697
1698 /* Now try high-order zero bits. Here we try the shifted-in bits as
1699 all zero and all ones. Be careful to avoid shifting outside the
1700 mode and to avoid shifting outside the host wide int size. */
1701 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1702 confuse the recursive call and set all of the high 32 bits. */
1703
1704 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1705 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1706 if (bits > 0)
1707 for (; bits > 0; bits--)
1708 {
1709 new_const = c << bits;
1710 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1711 if (!temp)
1712 {
1713 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1714 temp = alpha_emit_set_const (subtarget, mode, new_const,
1715 i, no_output);
1716 }
1717 if (temp)
1718 {
1719 if (no_output)
1720 return temp;
1721 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1722 target, 1, OPTAB_WIDEN);
1723 }
1724 }
1725
1726 /* Now try high-order 1 bits. We get that with a sign-extension.
1727 But one bit isn't enough here. Be careful to avoid shifting outside
1728 the mode and to avoid shifting outside the host wide int size. */
1729
1730 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1731 - floor_log2 (~ c) - 2);
1732 if (bits > 0)
1733 for (; bits > 0; bits--)
1734 {
1735 new_const = c << bits;
1736 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1737 if (!temp)
1738 {
1739 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1740 temp = alpha_emit_set_const (subtarget, mode, new_const,
1741 i, no_output);
1742 }
1743 if (temp)
1744 {
1745 if (no_output)
1746 return temp;
1747 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1748 target, 0, OPTAB_WIDEN);
1749 }
1750 }
1751 }
1752
1753 #if HOST_BITS_PER_WIDE_INT == 64
1754 /* Finally, see if can load a value into the target that is the same as the
1755 constant except that all bytes that are 0 are changed to be 0xff. If we
1756 can, then we can do a ZAPNOT to obtain the desired constant. */
1757
1758 new_const = c;
1759 for (i = 0; i < 64; i += 8)
1760 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1761 new_const |= (HOST_WIDE_INT) 0xff << i;
1762
1763 /* We are only called for SImode and DImode. If this is SImode, ensure that
1764 we are sign extended to a full word. */
1765
1766 if (mode == SImode)
1767 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1768
1769 if (new_const != c)
1770 {
1771 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1772 if (temp)
1773 {
1774 if (no_output)
1775 return temp;
1776 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1777 target, 0, OPTAB_WIDEN);
1778 }
1779 }
1780 #endif
1781
1782 return 0;
1783 }
1784
1785 /* Try to output insns to set TARGET equal to the constant C if it can be
1786 done in less than N insns. Do all computations in MODE. Returns the place
1787 where the output has been placed if it can be done and the insns have been
1788 emitted. If it would take more than N insns, zero is returned and no
1789 insns and emitted. */
1790
1791 static rtx
1792 alpha_emit_set_const (rtx target, enum machine_mode mode,
1793 HOST_WIDE_INT c, int n, bool no_output)
1794 {
1795 enum machine_mode orig_mode = mode;
1796 rtx orig_target = target;
1797 rtx result = 0;
1798 int i;
1799
1800 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1801 can't load this constant in one insn, do this in DImode. */
1802 if (!can_create_pseudo_p () && mode == SImode
1803 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1804 {
1805 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1806 if (result)
1807 return result;
1808
1809 target = no_output ? NULL : gen_lowpart (DImode, target);
1810 mode = DImode;
1811 }
1812 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1813 {
1814 target = no_output ? NULL : gen_lowpart (DImode, target);
1815 mode = DImode;
1816 }
1817
1818 /* Try 1 insn, then 2, then up to N. */
1819 for (i = 1; i <= n; i++)
1820 {
1821 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1822 if (result)
1823 {
1824 rtx insn, set;
1825
1826 if (no_output)
1827 return result;
1828
1829 insn = get_last_insn ();
1830 set = single_set (insn);
1831 if (! CONSTANT_P (SET_SRC (set)))
1832 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1833 break;
1834 }
1835 }
1836
1837 /* Allow for the case where we changed the mode of TARGET. */
1838 if (result)
1839 {
1840 if (result == target)
1841 result = orig_target;
1842 else if (mode != orig_mode)
1843 result = gen_lowpart (orig_mode, result);
1844 }
1845
1846 return result;
1847 }
1848
1849 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1850 fall back to a straight forward decomposition. We do this to avoid
1851 exponential run times encountered when looking for longer sequences
1852 with alpha_emit_set_const. */
1853
1854 static rtx
1855 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1856 {
1857 HOST_WIDE_INT d1, d2, d3, d4;
1858
1859 /* Decompose the entire word */
1860 #if HOST_BITS_PER_WIDE_INT >= 64
1861 gcc_assert (c2 == -(c1 < 0));
1862 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1863 c1 -= d1;
1864 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1865 c1 = (c1 - d2) >> 32;
1866 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1867 c1 -= d3;
1868 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1869 gcc_assert (c1 == d4);
1870 #else
1871 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1872 c1 -= d1;
1873 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1874 gcc_assert (c1 == d2);
1875 c2 += (d2 < 0);
1876 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1877 c2 -= d3;
1878 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1879 gcc_assert (c2 == d4);
1880 #endif
1881
1882 /* Construct the high word */
1883 if (d4)
1884 {
1885 emit_move_insn (target, GEN_INT (d4));
1886 if (d3)
1887 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1888 }
1889 else
1890 emit_move_insn (target, GEN_INT (d3));
1891
1892 /* Shift it into place */
1893 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1894
1895 /* Add in the low bits. */
1896 if (d2)
1897 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1898 if (d1)
1899 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1900
1901 return target;
1902 }
1903
1904 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
1905 the low 64 bits. */
1906
1907 static void
1908 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
1909 {
1910 HOST_WIDE_INT i0, i1;
1911
1912 if (GET_CODE (x) == CONST_VECTOR)
1913 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
1914
1915
1916 if (CONST_INT_P (x))
1917 {
1918 i0 = INTVAL (x);
1919 i1 = -(i0 < 0);
1920 }
1921 else if (HOST_BITS_PER_WIDE_INT >= 64)
1922 {
1923 i0 = CONST_DOUBLE_LOW (x);
1924 i1 = -(i0 < 0);
1925 }
1926 else
1927 {
1928 i0 = CONST_DOUBLE_LOW (x);
1929 i1 = CONST_DOUBLE_HIGH (x);
1930 }
1931
1932 *p0 = i0;
1933 *p1 = i1;
1934 }
1935
1936 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
1937 we are willing to load the value into a register via a move pattern.
1938 Normally this is all symbolic constants, integral constants that
1939 take three or fewer instructions, and floating-point zero. */
1940
1941 bool
1942 alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
1943 {
1944 HOST_WIDE_INT i0, i1;
1945
1946 switch (GET_CODE (x))
1947 {
1948 case LABEL_REF:
1949 case HIGH:
1950 return true;
1951
1952 case CONST:
1953 if (GET_CODE (XEXP (x, 0)) == PLUS
1954 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
1955 x = XEXP (XEXP (x, 0), 0);
1956 else
1957 return true;
1958
1959 if (GET_CODE (x) != SYMBOL_REF)
1960 return true;
1961
1962 /* FALLTHRU */
1963
1964 case SYMBOL_REF:
1965 /* TLS symbols are never valid. */
1966 return SYMBOL_REF_TLS_MODEL (x) == 0;
1967
1968 case CONST_DOUBLE:
1969 if (x == CONST0_RTX (mode))
1970 return true;
1971 if (FLOAT_MODE_P (mode))
1972 return false;
1973 goto do_integer;
1974
1975 case CONST_VECTOR:
1976 if (x == CONST0_RTX (mode))
1977 return true;
1978 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
1979 return false;
1980 if (GET_MODE_SIZE (mode) != 8)
1981 return false;
1982 goto do_integer;
1983
1984 case CONST_INT:
1985 do_integer:
1986 if (TARGET_BUILD_CONSTANTS)
1987 return true;
1988 alpha_extract_integer (x, &i0, &i1);
1989 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
1990 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
1991 return false;
1992
1993 default:
1994 return false;
1995 }
1996 }
1997
1998 /* Operand 1 is known to be a constant, and should require more than one
1999 instruction to load. Emit that multi-part load. */
2000
2001 bool
2002 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2003 {
2004 HOST_WIDE_INT i0, i1;
2005 rtx temp = NULL_RTX;
2006
2007 alpha_extract_integer (operands[1], &i0, &i1);
2008
2009 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2010 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2011
2012 if (!temp && TARGET_BUILD_CONSTANTS)
2013 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2014
2015 if (temp)
2016 {
2017 if (!rtx_equal_p (operands[0], temp))
2018 emit_move_insn (operands[0], temp);
2019 return true;
2020 }
2021
2022 return false;
2023 }
2024
2025 /* Expand a move instruction; return true if all work is done.
2026 We don't handle non-bwx subword loads here. */
2027
2028 bool
2029 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2030 {
2031 rtx tmp;
2032
2033 /* If the output is not a register, the input must be. */
2034 if (MEM_P (operands[0])
2035 && ! reg_or_0_operand (operands[1], mode))
2036 operands[1] = force_reg (mode, operands[1]);
2037
2038 /* Allow legitimize_address to perform some simplifications. */
2039 if (mode == Pmode && symbolic_operand (operands[1], mode))
2040 {
2041 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2042 if (tmp)
2043 {
2044 if (tmp == operands[0])
2045 return true;
2046 operands[1] = tmp;
2047 return false;
2048 }
2049 }
2050
2051 /* Early out for non-constants and valid constants. */
2052 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2053 return false;
2054
2055 /* Split large integers. */
2056 if (CONST_INT_P (operands[1])
2057 || GET_CODE (operands[1]) == CONST_DOUBLE
2058 || GET_CODE (operands[1]) == CONST_VECTOR)
2059 {
2060 if (alpha_split_const_mov (mode, operands))
2061 return true;
2062 }
2063
2064 /* Otherwise we've nothing left but to drop the thing to memory. */
2065 tmp = force_const_mem (mode, operands[1]);
2066
2067 if (tmp == NULL_RTX)
2068 return false;
2069
2070 if (reload_in_progress)
2071 {
2072 emit_move_insn (operands[0], XEXP (tmp, 0));
2073 operands[1] = replace_equiv_address (tmp, operands[0]);
2074 }
2075 else
2076 operands[1] = validize_mem (tmp);
2077 return false;
2078 }
2079
2080 /* Expand a non-bwx QImode or HImode move instruction;
2081 return true if all work is done. */
2082
2083 bool
2084 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2085 {
2086 rtx seq;
2087
2088 /* If the output is not a register, the input must be. */
2089 if (MEM_P (operands[0]))
2090 operands[1] = force_reg (mode, operands[1]);
2091
2092 /* Handle four memory cases, unaligned and aligned for either the input
2093 or the output. The only case where we can be called during reload is
2094 for aligned loads; all other cases require temporaries. */
2095
2096 if (any_memory_operand (operands[1], mode))
2097 {
2098 if (aligned_memory_operand (operands[1], mode))
2099 {
2100 if (reload_in_progress)
2101 {
2102 if (mode == QImode)
2103 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2104 else
2105 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2106 emit_insn (seq);
2107 }
2108 else
2109 {
2110 rtx aligned_mem, bitnum;
2111 rtx scratch = gen_reg_rtx (SImode);
2112 rtx subtarget;
2113 bool copyout;
2114
2115 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2116
2117 subtarget = operands[0];
2118 if (REG_P (subtarget))
2119 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2120 else
2121 subtarget = gen_reg_rtx (DImode), copyout = true;
2122
2123 if (mode == QImode)
2124 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2125 bitnum, scratch);
2126 else
2127 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2128 bitnum, scratch);
2129 emit_insn (seq);
2130
2131 if (copyout)
2132 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2133 }
2134 }
2135 else
2136 {
2137 /* Don't pass these as parameters since that makes the generated
2138 code depend on parameter evaluation order which will cause
2139 bootstrap failures. */
2140
2141 rtx temp1, temp2, subtarget, ua;
2142 bool copyout;
2143
2144 temp1 = gen_reg_rtx (DImode);
2145 temp2 = gen_reg_rtx (DImode);
2146
2147 subtarget = operands[0];
2148 if (REG_P (subtarget))
2149 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2150 else
2151 subtarget = gen_reg_rtx (DImode), copyout = true;
2152
2153 ua = get_unaligned_address (operands[1]);
2154 if (mode == QImode)
2155 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2156 else
2157 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2158
2159 alpha_set_memflags (seq, operands[1]);
2160 emit_insn (seq);
2161
2162 if (copyout)
2163 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2164 }
2165 return true;
2166 }
2167
2168 if (any_memory_operand (operands[0], mode))
2169 {
2170 if (aligned_memory_operand (operands[0], mode))
2171 {
2172 rtx aligned_mem, bitnum;
2173 rtx temp1 = gen_reg_rtx (SImode);
2174 rtx temp2 = gen_reg_rtx (SImode);
2175
2176 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2177
2178 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2179 temp1, temp2));
2180 }
2181 else
2182 {
2183 rtx temp1 = gen_reg_rtx (DImode);
2184 rtx temp2 = gen_reg_rtx (DImode);
2185 rtx temp3 = gen_reg_rtx (DImode);
2186 rtx ua = get_unaligned_address (operands[0]);
2187
2188 if (mode == QImode)
2189 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2190 else
2191 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2192
2193 alpha_set_memflags (seq, operands[0]);
2194 emit_insn (seq);
2195 }
2196 return true;
2197 }
2198
2199 return false;
2200 }
2201
2202 /* Implement the movmisalign patterns. One of the operands is a memory
2203 that is not naturally aligned. Emit instructions to load it. */
2204
2205 void
2206 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2207 {
2208 /* Honor misaligned loads, for those we promised to do so. */
2209 if (MEM_P (operands[1]))
2210 {
2211 rtx tmp;
2212
2213 if (register_operand (operands[0], mode))
2214 tmp = operands[0];
2215 else
2216 tmp = gen_reg_rtx (mode);
2217
2218 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2219 if (tmp != operands[0])
2220 emit_move_insn (operands[0], tmp);
2221 }
2222 else if (MEM_P (operands[0]))
2223 {
2224 if (!reg_or_0_operand (operands[1], mode))
2225 operands[1] = force_reg (mode, operands[1]);
2226 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2227 }
2228 else
2229 gcc_unreachable ();
2230 }
2231
2232 /* Generate an unsigned DImode to FP conversion. This is the same code
2233 optabs would emit if we didn't have TFmode patterns.
2234
2235 For SFmode, this is the only construction I've found that can pass
2236 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2237 intermediates will work, because you'll get intermediate rounding
2238 that ruins the end result. Some of this could be fixed by turning
2239 on round-to-positive-infinity, but that requires diddling the fpsr,
2240 which kills performance. I tried turning this around and converting
2241 to a negative number, so that I could turn on /m, but either I did
2242 it wrong or there's something else cause I wound up with the exact
2243 same single-bit error. There is a branch-less form of this same code:
2244
2245 srl $16,1,$1
2246 and $16,1,$2
2247 cmplt $16,0,$3
2248 or $1,$2,$2
2249 cmovge $16,$16,$2
2250 itoft $3,$f10
2251 itoft $2,$f11
2252 cvtqs $f11,$f11
2253 adds $f11,$f11,$f0
2254 fcmoveq $f10,$f11,$f0
2255
2256 I'm not using it because it's the same number of instructions as
2257 this branch-full form, and it has more serialized long latency
2258 instructions on the critical path.
2259
2260 For DFmode, we can avoid rounding errors by breaking up the word
2261 into two pieces, converting them separately, and adding them back:
2262
2263 LC0: .long 0,0x5f800000
2264
2265 itoft $16,$f11
2266 lda $2,LC0
2267 cmplt $16,0,$1
2268 cpyse $f11,$f31,$f10
2269 cpyse $f31,$f11,$f11
2270 s4addq $1,$2,$1
2271 lds $f12,0($1)
2272 cvtqt $f10,$f10
2273 cvtqt $f11,$f11
2274 addt $f12,$f10,$f0
2275 addt $f0,$f11,$f0
2276
2277 This doesn't seem to be a clear-cut win over the optabs form.
2278 It probably all depends on the distribution of numbers being
2279 converted -- in the optabs form, all but high-bit-set has a
2280 much lower minimum execution time. */
2281
2282 void
2283 alpha_emit_floatuns (rtx operands[2])
2284 {
2285 rtx neglab, donelab, i0, i1, f0, in, out;
2286 enum machine_mode mode;
2287
2288 out = operands[0];
2289 in = force_reg (DImode, operands[1]);
2290 mode = GET_MODE (out);
2291 neglab = gen_label_rtx ();
2292 donelab = gen_label_rtx ();
2293 i0 = gen_reg_rtx (DImode);
2294 i1 = gen_reg_rtx (DImode);
2295 f0 = gen_reg_rtx (mode);
2296
2297 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2298
2299 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2300 emit_jump_insn (gen_jump (donelab));
2301 emit_barrier ();
2302
2303 emit_label (neglab);
2304
2305 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2306 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2307 emit_insn (gen_iordi3 (i0, i0, i1));
2308 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2309 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2310
2311 emit_label (donelab);
2312 }
2313
2314 /* Generate the comparison for a conditional branch. */
2315
2316 void
2317 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2318 {
2319 enum rtx_code cmp_code, branch_code;
2320 enum machine_mode branch_mode = VOIDmode;
2321 enum rtx_code code = GET_CODE (operands[0]);
2322 rtx op0 = operands[1], op1 = operands[2];
2323 rtx tem;
2324
2325 if (cmp_mode == TFmode)
2326 {
2327 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2328 op1 = const0_rtx;
2329 cmp_mode = DImode;
2330 }
2331
2332 /* The general case: fold the comparison code to the types of compares
2333 that we have, choosing the branch as necessary. */
2334 switch (code)
2335 {
2336 case EQ: case LE: case LT: case LEU: case LTU:
2337 case UNORDERED:
2338 /* We have these compares: */
2339 cmp_code = code, branch_code = NE;
2340 break;
2341
2342 case NE:
2343 case ORDERED:
2344 /* These must be reversed. */
2345 cmp_code = reverse_condition (code), branch_code = EQ;
2346 break;
2347
2348 case GE: case GT: case GEU: case GTU:
2349 /* For FP, we swap them, for INT, we reverse them. */
2350 if (cmp_mode == DFmode)
2351 {
2352 cmp_code = swap_condition (code);
2353 branch_code = NE;
2354 tem = op0, op0 = op1, op1 = tem;
2355 }
2356 else
2357 {
2358 cmp_code = reverse_condition (code);
2359 branch_code = EQ;
2360 }
2361 break;
2362
2363 default:
2364 gcc_unreachable ();
2365 }
2366
2367 if (cmp_mode == DFmode)
2368 {
2369 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2370 {
2371 /* When we are not as concerned about non-finite values, and we
2372 are comparing against zero, we can branch directly. */
2373 if (op1 == CONST0_RTX (DFmode))
2374 cmp_code = UNKNOWN, branch_code = code;
2375 else if (op0 == CONST0_RTX (DFmode))
2376 {
2377 /* Undo the swap we probably did just above. */
2378 tem = op0, op0 = op1, op1 = tem;
2379 branch_code = swap_condition (cmp_code);
2380 cmp_code = UNKNOWN;
2381 }
2382 }
2383 else
2384 {
2385 /* ??? We mark the branch mode to be CCmode to prevent the
2386 compare and branch from being combined, since the compare
2387 insn follows IEEE rules that the branch does not. */
2388 branch_mode = CCmode;
2389 }
2390 }
2391 else
2392 {
2393 /* The following optimizations are only for signed compares. */
2394 if (code != LEU && code != LTU && code != GEU && code != GTU)
2395 {
2396 /* Whee. Compare and branch against 0 directly. */
2397 if (op1 == const0_rtx)
2398 cmp_code = UNKNOWN, branch_code = code;
2399
2400 /* If the constants doesn't fit into an immediate, but can
2401 be generated by lda/ldah, we adjust the argument and
2402 compare against zero, so we can use beq/bne directly. */
2403 /* ??? Don't do this when comparing against symbols, otherwise
2404 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2405 be declared false out of hand (at least for non-weak). */
2406 else if (CONST_INT_P (op1)
2407 && (code == EQ || code == NE)
2408 && !(symbolic_operand (op0, VOIDmode)
2409 || (REG_P (op0) && REG_POINTER (op0))))
2410 {
2411 rtx n_op1 = GEN_INT (-INTVAL (op1));
2412
2413 if (! satisfies_constraint_I (op1)
2414 && (satisfies_constraint_K (n_op1)
2415 || satisfies_constraint_L (n_op1)))
2416 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2417 }
2418 }
2419
2420 if (!reg_or_0_operand (op0, DImode))
2421 op0 = force_reg (DImode, op0);
2422 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2423 op1 = force_reg (DImode, op1);
2424 }
2425
2426 /* Emit an initial compare instruction, if necessary. */
2427 tem = op0;
2428 if (cmp_code != UNKNOWN)
2429 {
2430 tem = gen_reg_rtx (cmp_mode);
2431 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2432 }
2433
2434 /* Emit the branch instruction. */
2435 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2436 gen_rtx_IF_THEN_ELSE (VOIDmode,
2437 gen_rtx_fmt_ee (branch_code,
2438 branch_mode, tem,
2439 CONST0_RTX (cmp_mode)),
2440 gen_rtx_LABEL_REF (VOIDmode,
2441 operands[3]),
2442 pc_rtx));
2443 emit_jump_insn (tem);
2444 }
2445
2446 /* Certain simplifications can be done to make invalid setcc operations
2447 valid. Return the final comparison, or NULL if we can't work. */
2448
2449 bool
2450 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2451 {
2452 enum rtx_code cmp_code;
2453 enum rtx_code code = GET_CODE (operands[1]);
2454 rtx op0 = operands[2], op1 = operands[3];
2455 rtx tmp;
2456
2457 if (cmp_mode == TFmode)
2458 {
2459 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2460 op1 = const0_rtx;
2461 cmp_mode = DImode;
2462 }
2463
2464 if (cmp_mode == DFmode && !TARGET_FIX)
2465 return 0;
2466
2467 /* The general case: fold the comparison code to the types of compares
2468 that we have, choosing the branch as necessary. */
2469
2470 cmp_code = UNKNOWN;
2471 switch (code)
2472 {
2473 case EQ: case LE: case LT: case LEU: case LTU:
2474 case UNORDERED:
2475 /* We have these compares. */
2476 if (cmp_mode == DFmode)
2477 cmp_code = code, code = NE;
2478 break;
2479
2480 case NE:
2481 if (cmp_mode == DImode && op1 == const0_rtx)
2482 break;
2483 /* FALLTHRU */
2484
2485 case ORDERED:
2486 cmp_code = reverse_condition (code);
2487 code = EQ;
2488 break;
2489
2490 case GE: case GT: case GEU: case GTU:
2491 /* These normally need swapping, but for integer zero we have
2492 special patterns that recognize swapped operands. */
2493 if (cmp_mode == DImode && op1 == const0_rtx)
2494 break;
2495 code = swap_condition (code);
2496 if (cmp_mode == DFmode)
2497 cmp_code = code, code = NE;
2498 tmp = op0, op0 = op1, op1 = tmp;
2499 break;
2500
2501 default:
2502 gcc_unreachable ();
2503 }
2504
2505 if (cmp_mode == DImode)
2506 {
2507 if (!register_operand (op0, DImode))
2508 op0 = force_reg (DImode, op0);
2509 if (!reg_or_8bit_operand (op1, DImode))
2510 op1 = force_reg (DImode, op1);
2511 }
2512
2513 /* Emit an initial compare instruction, if necessary. */
2514 if (cmp_code != UNKNOWN)
2515 {
2516 tmp = gen_reg_rtx (cmp_mode);
2517 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2518 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2519
2520 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2521 op1 = const0_rtx;
2522 }
2523
2524 /* Emit the setcc instruction. */
2525 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2526 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2527 return true;
2528 }
2529
2530
2531 /* Rewrite a comparison against zero CMP of the form
2532 (CODE (cc0) (const_int 0)) so it can be written validly in
2533 a conditional move (if_then_else CMP ...).
2534 If both of the operands that set cc0 are nonzero we must emit
2535 an insn to perform the compare (it can't be done within
2536 the conditional move). */
2537
2538 rtx
2539 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2540 {
2541 enum rtx_code code = GET_CODE (cmp);
2542 enum rtx_code cmov_code = NE;
2543 rtx op0 = XEXP (cmp, 0);
2544 rtx op1 = XEXP (cmp, 1);
2545 enum machine_mode cmp_mode
2546 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2547 enum machine_mode cmov_mode = VOIDmode;
2548 int local_fast_math = flag_unsafe_math_optimizations;
2549 rtx tem;
2550
2551 if (cmp_mode == TFmode)
2552 {
2553 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2554 op1 = const0_rtx;
2555 cmp_mode = DImode;
2556 }
2557
2558 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2559
2560 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2561 {
2562 enum rtx_code cmp_code;
2563
2564 if (! TARGET_FIX)
2565 return 0;
2566
2567 /* If we have fp<->int register move instructions, do a cmov by
2568 performing the comparison in fp registers, and move the
2569 zero/nonzero value to integer registers, where we can then
2570 use a normal cmov, or vice-versa. */
2571
2572 switch (code)
2573 {
2574 case EQ: case LE: case LT: case LEU: case LTU:
2575 /* We have these compares. */
2576 cmp_code = code, code = NE;
2577 break;
2578
2579 case NE:
2580 /* This must be reversed. */
2581 cmp_code = EQ, code = EQ;
2582 break;
2583
2584 case GE: case GT: case GEU: case GTU:
2585 /* These normally need swapping, but for integer zero we have
2586 special patterns that recognize swapped operands. */
2587 if (cmp_mode == DImode && op1 == const0_rtx)
2588 cmp_code = code, code = NE;
2589 else
2590 {
2591 cmp_code = swap_condition (code);
2592 code = NE;
2593 tem = op0, op0 = op1, op1 = tem;
2594 }
2595 break;
2596
2597 default:
2598 gcc_unreachable ();
2599 }
2600
2601 tem = gen_reg_rtx (cmp_mode);
2602 emit_insn (gen_rtx_SET (VOIDmode, tem,
2603 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2604 op0, op1)));
2605
2606 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2607 op0 = gen_lowpart (cmp_mode, tem);
2608 op1 = CONST0_RTX (cmp_mode);
2609 local_fast_math = 1;
2610 }
2611
2612 /* We may be able to use a conditional move directly.
2613 This avoids emitting spurious compares. */
2614 if (signed_comparison_operator (cmp, VOIDmode)
2615 && (cmp_mode == DImode || local_fast_math)
2616 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2617 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2618
2619 /* We can't put the comparison inside the conditional move;
2620 emit a compare instruction and put that inside the
2621 conditional move. Make sure we emit only comparisons we have;
2622 swap or reverse as necessary. */
2623
2624 if (!can_create_pseudo_p ())
2625 return NULL_RTX;
2626
2627 switch (code)
2628 {
2629 case EQ: case LE: case LT: case LEU: case LTU:
2630 /* We have these compares: */
2631 break;
2632
2633 case NE:
2634 /* This must be reversed. */
2635 code = reverse_condition (code);
2636 cmov_code = EQ;
2637 break;
2638
2639 case GE: case GT: case GEU: case GTU:
2640 /* These must be swapped. */
2641 if (op1 != CONST0_RTX (cmp_mode))
2642 {
2643 code = swap_condition (code);
2644 tem = op0, op0 = op1, op1 = tem;
2645 }
2646 break;
2647
2648 default:
2649 gcc_unreachable ();
2650 }
2651
2652 if (cmp_mode == DImode)
2653 {
2654 if (!reg_or_0_operand (op0, DImode))
2655 op0 = force_reg (DImode, op0);
2656 if (!reg_or_8bit_operand (op1, DImode))
2657 op1 = force_reg (DImode, op1);
2658 }
2659
2660 /* ??? We mark the branch mode to be CCmode to prevent the compare
2661 and cmov from being combined, since the compare insn follows IEEE
2662 rules that the cmov does not. */
2663 if (cmp_mode == DFmode && !local_fast_math)
2664 cmov_mode = CCmode;
2665
2666 tem = gen_reg_rtx (cmp_mode);
2667 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2668 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2669 }
2670
2671 /* Simplify a conditional move of two constants into a setcc with
2672 arithmetic. This is done with a splitter since combine would
2673 just undo the work if done during code generation. It also catches
2674 cases we wouldn't have before cse. */
2675
2676 int
2677 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2678 rtx t_rtx, rtx f_rtx)
2679 {
2680 HOST_WIDE_INT t, f, diff;
2681 enum machine_mode mode;
2682 rtx target, subtarget, tmp;
2683
2684 mode = GET_MODE (dest);
2685 t = INTVAL (t_rtx);
2686 f = INTVAL (f_rtx);
2687 diff = t - f;
2688
2689 if (((code == NE || code == EQ) && diff < 0)
2690 || (code == GE || code == GT))
2691 {
2692 code = reverse_condition (code);
2693 diff = t, t = f, f = diff;
2694 diff = t - f;
2695 }
2696
2697 subtarget = target = dest;
2698 if (mode != DImode)
2699 {
2700 target = gen_lowpart (DImode, dest);
2701 if (can_create_pseudo_p ())
2702 subtarget = gen_reg_rtx (DImode);
2703 else
2704 subtarget = target;
2705 }
2706 /* Below, we must be careful to use copy_rtx on target and subtarget
2707 in intermediate insns, as they may be a subreg rtx, which may not
2708 be shared. */
2709
2710 if (f == 0 && exact_log2 (diff) > 0
2711 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2712 viable over a longer latency cmove. On EV5, the E0 slot is a
2713 scarce resource, and on EV4 shift has the same latency as a cmove. */
2714 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2715 {
2716 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2717 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2718
2719 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2720 GEN_INT (exact_log2 (t)));
2721 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2722 }
2723 else if (f == 0 && t == -1)
2724 {
2725 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2726 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2727
2728 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2729 }
2730 else if (diff == 1 || diff == 4 || diff == 8)
2731 {
2732 rtx add_op;
2733
2734 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2735 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2736
2737 if (diff == 1)
2738 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2739 else
2740 {
2741 add_op = GEN_INT (f);
2742 if (sext_add_operand (add_op, mode))
2743 {
2744 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2745 GEN_INT (diff));
2746 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2747 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2748 }
2749 else
2750 return 0;
2751 }
2752 }
2753 else
2754 return 0;
2755
2756 return 1;
2757 }
2758 \f
2759 /* Look up the function X_floating library function name for the
2760 given operation. */
2761
2762 struct GTY(()) xfloating_op
2763 {
2764 const enum rtx_code code;
2765 const char *const GTY((skip)) osf_func;
2766 const char *const GTY((skip)) vms_func;
2767 rtx libcall;
2768 };
2769
2770 static GTY(()) struct xfloating_op xfloating_ops[] =
2771 {
2772 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2773 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2774 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2775 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2776 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2777 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2778 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2779 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2780 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2781 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2782 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2783 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2784 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2785 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2786 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2787 };
2788
2789 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2790 {
2791 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2792 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2793 };
2794
2795 static rtx
2796 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2797 {
2798 struct xfloating_op *ops = xfloating_ops;
2799 long n = ARRAY_SIZE (xfloating_ops);
2800 long i;
2801
2802 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2803
2804 /* How irritating. Nothing to key off for the main table. */
2805 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2806 {
2807 ops = vax_cvt_ops;
2808 n = ARRAY_SIZE (vax_cvt_ops);
2809 }
2810
2811 for (i = 0; i < n; ++i, ++ops)
2812 if (ops->code == code)
2813 {
2814 rtx func = ops->libcall;
2815 if (!func)
2816 {
2817 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2818 ? ops->vms_func : ops->osf_func);
2819 ops->libcall = func;
2820 }
2821 return func;
2822 }
2823
2824 gcc_unreachable ();
2825 }
2826
2827 /* Most X_floating operations take the rounding mode as an argument.
2828 Compute that here. */
2829
2830 static int
2831 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2832 enum alpha_fp_rounding_mode round)
2833 {
2834 int mode;
2835
2836 switch (round)
2837 {
2838 case ALPHA_FPRM_NORM:
2839 mode = 2;
2840 break;
2841 case ALPHA_FPRM_MINF:
2842 mode = 1;
2843 break;
2844 case ALPHA_FPRM_CHOP:
2845 mode = 0;
2846 break;
2847 case ALPHA_FPRM_DYN:
2848 mode = 4;
2849 break;
2850 default:
2851 gcc_unreachable ();
2852
2853 /* XXX For reference, round to +inf is mode = 3. */
2854 }
2855
2856 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2857 mode |= 0x10000;
2858
2859 return mode;
2860 }
2861
2862 /* Emit an X_floating library function call.
2863
2864 Note that these functions do not follow normal calling conventions:
2865 TFmode arguments are passed in two integer registers (as opposed to
2866 indirect); TFmode return values appear in R16+R17.
2867
2868 FUNC is the function to call.
2869 TARGET is where the output belongs.
2870 OPERANDS are the inputs.
2871 NOPERANDS is the count of inputs.
2872 EQUIV is the expression equivalent for the function.
2873 */
2874
2875 static void
2876 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2877 int noperands, rtx equiv)
2878 {
2879 rtx usage = NULL_RTX, tmp, reg;
2880 int regno = 16, i;
2881
2882 start_sequence ();
2883
2884 for (i = 0; i < noperands; ++i)
2885 {
2886 switch (GET_MODE (operands[i]))
2887 {
2888 case TFmode:
2889 reg = gen_rtx_REG (TFmode, regno);
2890 regno += 2;
2891 break;
2892
2893 case DFmode:
2894 reg = gen_rtx_REG (DFmode, regno + 32);
2895 regno += 1;
2896 break;
2897
2898 case VOIDmode:
2899 gcc_assert (CONST_INT_P (operands[i]));
2900 /* FALLTHRU */
2901 case DImode:
2902 reg = gen_rtx_REG (DImode, regno);
2903 regno += 1;
2904 break;
2905
2906 default:
2907 gcc_unreachable ();
2908 }
2909
2910 emit_move_insn (reg, operands[i]);
2911 use_reg (&usage, reg);
2912 }
2913
2914 switch (GET_MODE (target))
2915 {
2916 case TFmode:
2917 reg = gen_rtx_REG (TFmode, 16);
2918 break;
2919 case DFmode:
2920 reg = gen_rtx_REG (DFmode, 32);
2921 break;
2922 case DImode:
2923 reg = gen_rtx_REG (DImode, 0);
2924 break;
2925 default:
2926 gcc_unreachable ();
2927 }
2928
2929 tmp = gen_rtx_MEM (QImode, func);
2930 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
2931 const0_rtx, const0_rtx));
2932 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
2933 RTL_CONST_CALL_P (tmp) = 1;
2934
2935 tmp = get_insns ();
2936 end_sequence ();
2937
2938 emit_libcall_block (tmp, target, reg, equiv);
2939 }
2940
2941 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
2942
2943 void
2944 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
2945 {
2946 rtx func;
2947 int mode;
2948 rtx out_operands[3];
2949
2950 func = alpha_lookup_xfloating_lib_func (code);
2951 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2952
2953 out_operands[0] = operands[1];
2954 out_operands[1] = operands[2];
2955 out_operands[2] = GEN_INT (mode);
2956 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
2957 gen_rtx_fmt_ee (code, TFmode, operands[1],
2958 operands[2]));
2959 }
2960
2961 /* Emit an X_floating library function call for a comparison. */
2962
2963 static rtx
2964 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
2965 {
2966 enum rtx_code cmp_code, res_code;
2967 rtx func, out, operands[2], note;
2968
2969 /* X_floating library comparison functions return
2970 -1 unordered
2971 0 false
2972 1 true
2973 Convert the compare against the raw return value. */
2974
2975 cmp_code = *pcode;
2976 switch (cmp_code)
2977 {
2978 case UNORDERED:
2979 cmp_code = EQ;
2980 res_code = LT;
2981 break;
2982 case ORDERED:
2983 cmp_code = EQ;
2984 res_code = GE;
2985 break;
2986 case NE:
2987 res_code = NE;
2988 break;
2989 case EQ:
2990 case LT:
2991 case GT:
2992 case LE:
2993 case GE:
2994 res_code = GT;
2995 break;
2996 default:
2997 gcc_unreachable ();
2998 }
2999 *pcode = res_code;
3000
3001 func = alpha_lookup_xfloating_lib_func (cmp_code);
3002
3003 operands[0] = op0;
3004 operands[1] = op1;
3005 out = gen_reg_rtx (DImode);
3006
3007 /* What's actually returned is -1,0,1, not a proper boolean value,
3008 so use an EXPR_LIST as with a generic libcall instead of a
3009 comparison type expression. */
3010 note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
3011 note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
3012 note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
3013 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3014
3015 return out;
3016 }
3017
3018 /* Emit an X_floating library function call for a conversion. */
3019
3020 void
3021 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3022 {
3023 int noperands = 1, mode;
3024 rtx out_operands[2];
3025 rtx func;
3026 enum rtx_code code = orig_code;
3027
3028 if (code == UNSIGNED_FIX)
3029 code = FIX;
3030
3031 func = alpha_lookup_xfloating_lib_func (code);
3032
3033 out_operands[0] = operands[1];
3034
3035 switch (code)
3036 {
3037 case FIX:
3038 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3039 out_operands[1] = GEN_INT (mode);
3040 noperands = 2;
3041 break;
3042 case FLOAT_TRUNCATE:
3043 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3044 out_operands[1] = GEN_INT (mode);
3045 noperands = 2;
3046 break;
3047 default:
3048 break;
3049 }
3050
3051 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3052 gen_rtx_fmt_e (orig_code,
3053 GET_MODE (operands[0]),
3054 operands[1]));
3055 }
3056
3057 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3058 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3059 guarantee that the sequence
3060 set (OP[0] OP[2])
3061 set (OP[1] OP[3])
3062 is valid. Naturally, output operand ordering is little-endian.
3063 This is used by *movtf_internal and *movti_internal. */
3064
3065 void
3066 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3067 bool fixup_overlap)
3068 {
3069 switch (GET_CODE (operands[1]))
3070 {
3071 case REG:
3072 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3073 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3074 break;
3075
3076 case MEM:
3077 operands[3] = adjust_address (operands[1], DImode, 8);
3078 operands[2] = adjust_address (operands[1], DImode, 0);
3079 break;
3080
3081 case CONST_INT:
3082 case CONST_DOUBLE:
3083 gcc_assert (operands[1] == CONST0_RTX (mode));
3084 operands[2] = operands[3] = const0_rtx;
3085 break;
3086
3087 default:
3088 gcc_unreachable ();
3089 }
3090
3091 switch (GET_CODE (operands[0]))
3092 {
3093 case REG:
3094 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3095 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3096 break;
3097
3098 case MEM:
3099 operands[1] = adjust_address (operands[0], DImode, 8);
3100 operands[0] = adjust_address (operands[0], DImode, 0);
3101 break;
3102
3103 default:
3104 gcc_unreachable ();
3105 }
3106
3107 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3108 {
3109 rtx tmp;
3110 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3111 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3112 }
3113 }
3114
3115 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3116 op2 is a register containing the sign bit, operation is the
3117 logical operation to be performed. */
3118
3119 void
3120 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3121 {
3122 rtx high_bit = operands[2];
3123 rtx scratch;
3124 int move;
3125
3126 alpha_split_tmode_pair (operands, TFmode, false);
3127
3128 /* Detect three flavors of operand overlap. */
3129 move = 1;
3130 if (rtx_equal_p (operands[0], operands[2]))
3131 move = 0;
3132 else if (rtx_equal_p (operands[1], operands[2]))
3133 {
3134 if (rtx_equal_p (operands[0], high_bit))
3135 move = 2;
3136 else
3137 move = -1;
3138 }
3139
3140 if (move < 0)
3141 emit_move_insn (operands[0], operands[2]);
3142
3143 /* ??? If the destination overlaps both source tf and high_bit, then
3144 assume source tf is dead in its entirety and use the other half
3145 for a scratch register. Otherwise "scratch" is just the proper
3146 destination register. */
3147 scratch = operands[move < 2 ? 1 : 3];
3148
3149 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3150
3151 if (move > 0)
3152 {
3153 emit_move_insn (operands[0], operands[2]);
3154 if (move > 1)
3155 emit_move_insn (operands[1], scratch);
3156 }
3157 }
3158 \f
3159 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3160 unaligned data:
3161
3162 unsigned: signed:
3163 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3164 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3165 lda r3,X(r11) lda r3,X+2(r11)
3166 extwl r1,r3,r1 extql r1,r3,r1
3167 extwh r2,r3,r2 extqh r2,r3,r2
3168 or r1.r2.r1 or r1,r2,r1
3169 sra r1,48,r1
3170
3171 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3172 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3173 lda r3,X(r11) lda r3,X(r11)
3174 extll r1,r3,r1 extll r1,r3,r1
3175 extlh r2,r3,r2 extlh r2,r3,r2
3176 or r1.r2.r1 addl r1,r2,r1
3177
3178 quad: ldq_u r1,X(r11)
3179 ldq_u r2,X+7(r11)
3180 lda r3,X(r11)
3181 extql r1,r3,r1
3182 extqh r2,r3,r2
3183 or r1.r2.r1
3184 */
3185
3186 void
3187 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3188 HOST_WIDE_INT ofs, int sign)
3189 {
3190 rtx meml, memh, addr, extl, exth, tmp, mema;
3191 enum machine_mode mode;
3192
3193 if (TARGET_BWX && size == 2)
3194 {
3195 meml = adjust_address (mem, QImode, ofs);
3196 memh = adjust_address (mem, QImode, ofs+1);
3197 extl = gen_reg_rtx (DImode);
3198 exth = gen_reg_rtx (DImode);
3199 emit_insn (gen_zero_extendqidi2 (extl, meml));
3200 emit_insn (gen_zero_extendqidi2 (exth, memh));
3201 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3202 NULL, 1, OPTAB_LIB_WIDEN);
3203 addr = expand_simple_binop (DImode, IOR, extl, exth,
3204 NULL, 1, OPTAB_LIB_WIDEN);
3205
3206 if (sign && GET_MODE (tgt) != HImode)
3207 {
3208 addr = gen_lowpart (HImode, addr);
3209 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3210 }
3211 else
3212 {
3213 if (GET_MODE (tgt) != DImode)
3214 addr = gen_lowpart (GET_MODE (tgt), addr);
3215 emit_move_insn (tgt, addr);
3216 }
3217 return;
3218 }
3219
3220 meml = gen_reg_rtx (DImode);
3221 memh = gen_reg_rtx (DImode);
3222 addr = gen_reg_rtx (DImode);
3223 extl = gen_reg_rtx (DImode);
3224 exth = gen_reg_rtx (DImode);
3225
3226 mema = XEXP (mem, 0);
3227 if (GET_CODE (mema) == LO_SUM)
3228 mema = force_reg (Pmode, mema);
3229
3230 /* AND addresses cannot be in any alias set, since they may implicitly
3231 alias surrounding code. Ideally we'd have some alias set that
3232 covered all types except those with alignment 8 or higher. */
3233
3234 tmp = change_address (mem, DImode,
3235 gen_rtx_AND (DImode,
3236 plus_constant (mema, ofs),
3237 GEN_INT (-8)));
3238 set_mem_alias_set (tmp, 0);
3239 emit_move_insn (meml, tmp);
3240
3241 tmp = change_address (mem, DImode,
3242 gen_rtx_AND (DImode,
3243 plus_constant (mema, ofs + size - 1),
3244 GEN_INT (-8)));
3245 set_mem_alias_set (tmp, 0);
3246 emit_move_insn (memh, tmp);
3247
3248 if (sign && size == 2)
3249 {
3250 emit_move_insn (addr, plus_constant (mema, ofs+2));
3251
3252 emit_insn (gen_extql (extl, meml, addr));
3253 emit_insn (gen_extqh (exth, memh, addr));
3254
3255 /* We must use tgt here for the target. Alpha-vms port fails if we use
3256 addr for the target, because addr is marked as a pointer and combine
3257 knows that pointers are always sign-extended 32-bit values. */
3258 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3259 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3260 addr, 1, OPTAB_WIDEN);
3261 }
3262 else
3263 {
3264 emit_move_insn (addr, plus_constant (mema, ofs));
3265 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3266 switch ((int) size)
3267 {
3268 case 2:
3269 emit_insn (gen_extwh (exth, memh, addr));
3270 mode = HImode;
3271 break;
3272 case 4:
3273 emit_insn (gen_extlh (exth, memh, addr));
3274 mode = SImode;
3275 break;
3276 case 8:
3277 emit_insn (gen_extqh (exth, memh, addr));
3278 mode = DImode;
3279 break;
3280 default:
3281 gcc_unreachable ();
3282 }
3283
3284 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3285 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3286 sign, OPTAB_WIDEN);
3287 }
3288
3289 if (addr != tgt)
3290 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3291 }
3292
3293 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3294
3295 void
3296 alpha_expand_unaligned_store (rtx dst, rtx src,
3297 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3298 {
3299 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3300
3301 if (TARGET_BWX && size == 2)
3302 {
3303 if (src != const0_rtx)
3304 {
3305 dstl = gen_lowpart (QImode, src);
3306 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3307 NULL, 1, OPTAB_LIB_WIDEN);
3308 dsth = gen_lowpart (QImode, dsth);
3309 }
3310 else
3311 dstl = dsth = const0_rtx;
3312
3313 meml = adjust_address (dst, QImode, ofs);
3314 memh = adjust_address (dst, QImode, ofs+1);
3315
3316 emit_move_insn (meml, dstl);
3317 emit_move_insn (memh, dsth);
3318 return;
3319 }
3320
3321 dstl = gen_reg_rtx (DImode);
3322 dsth = gen_reg_rtx (DImode);
3323 insl = gen_reg_rtx (DImode);
3324 insh = gen_reg_rtx (DImode);
3325
3326 dsta = XEXP (dst, 0);
3327 if (GET_CODE (dsta) == LO_SUM)
3328 dsta = force_reg (Pmode, dsta);
3329
3330 /* AND addresses cannot be in any alias set, since they may implicitly
3331 alias surrounding code. Ideally we'd have some alias set that
3332 covered all types except those with alignment 8 or higher. */
3333
3334 meml = change_address (dst, DImode,
3335 gen_rtx_AND (DImode,
3336 plus_constant (dsta, ofs),
3337 GEN_INT (-8)));
3338 set_mem_alias_set (meml, 0);
3339
3340 memh = change_address (dst, DImode,
3341 gen_rtx_AND (DImode,
3342 plus_constant (dsta, ofs + size - 1),
3343 GEN_INT (-8)));
3344 set_mem_alias_set (memh, 0);
3345
3346 emit_move_insn (dsth, memh);
3347 emit_move_insn (dstl, meml);
3348
3349 addr = copy_addr_to_reg (plus_constant (dsta, ofs));
3350
3351 if (src != CONST0_RTX (GET_MODE (src)))
3352 {
3353 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3354 GEN_INT (size*8), addr));
3355
3356 switch ((int) size)
3357 {
3358 case 2:
3359 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3360 break;
3361 case 4:
3362 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3363 break;
3364 case 8:
3365 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3366 break;
3367 default:
3368 gcc_unreachable ();
3369 }
3370 }
3371
3372 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3373
3374 switch ((int) size)
3375 {
3376 case 2:
3377 emit_insn (gen_mskwl (dstl, dstl, addr));
3378 break;
3379 case 4:
3380 emit_insn (gen_mskll (dstl, dstl, addr));
3381 break;
3382 case 8:
3383 emit_insn (gen_mskql (dstl, dstl, addr));
3384 break;
3385 default:
3386 gcc_unreachable ();
3387 }
3388
3389 if (src != CONST0_RTX (GET_MODE (src)))
3390 {
3391 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3392 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3393 }
3394
3395 /* Must store high before low for degenerate case of aligned. */
3396 emit_move_insn (memh, dsth);
3397 emit_move_insn (meml, dstl);
3398 }
3399
3400 /* The block move code tries to maximize speed by separating loads and
3401 stores at the expense of register pressure: we load all of the data
3402 before we store it back out. There are two secondary effects worth
3403 mentioning, that this speeds copying to/from aligned and unaligned
3404 buffers, and that it makes the code significantly easier to write. */
3405
3406 #define MAX_MOVE_WORDS 8
3407
3408 /* Load an integral number of consecutive unaligned quadwords. */
3409
3410 static void
3411 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3412 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3413 {
3414 rtx const im8 = GEN_INT (-8);
3415 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3416 rtx sreg, areg, tmp, smema;
3417 HOST_WIDE_INT i;
3418
3419 smema = XEXP (smem, 0);
3420 if (GET_CODE (smema) == LO_SUM)
3421 smema = force_reg (Pmode, smema);
3422
3423 /* Generate all the tmp registers we need. */
3424 for (i = 0; i < words; ++i)
3425 {
3426 data_regs[i] = out_regs[i];
3427 ext_tmps[i] = gen_reg_rtx (DImode);
3428 }
3429 data_regs[words] = gen_reg_rtx (DImode);
3430
3431 if (ofs != 0)
3432 smem = adjust_address (smem, GET_MODE (smem), ofs);
3433
3434 /* Load up all of the source data. */
3435 for (i = 0; i < words; ++i)
3436 {
3437 tmp = change_address (smem, DImode,
3438 gen_rtx_AND (DImode,
3439 plus_constant (smema, 8*i),
3440 im8));
3441 set_mem_alias_set (tmp, 0);
3442 emit_move_insn (data_regs[i], tmp);
3443 }
3444
3445 tmp = change_address (smem, DImode,
3446 gen_rtx_AND (DImode,
3447 plus_constant (smema, 8*words - 1),
3448 im8));
3449 set_mem_alias_set (tmp, 0);
3450 emit_move_insn (data_regs[words], tmp);
3451
3452 /* Extract the half-word fragments. Unfortunately DEC decided to make
3453 extxh with offset zero a noop instead of zeroing the register, so
3454 we must take care of that edge condition ourselves with cmov. */
3455
3456 sreg = copy_addr_to_reg (smema);
3457 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3458 1, OPTAB_WIDEN);
3459 for (i = 0; i < words; ++i)
3460 {
3461 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3462 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3463 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3464 gen_rtx_IF_THEN_ELSE (DImode,
3465 gen_rtx_EQ (DImode, areg,
3466 const0_rtx),
3467 const0_rtx, ext_tmps[i])));
3468 }
3469
3470 /* Merge the half-words into whole words. */
3471 for (i = 0; i < words; ++i)
3472 {
3473 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3474 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3475 }
3476 }
3477
3478 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3479 may be NULL to store zeros. */
3480
3481 static void
3482 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3483 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3484 {
3485 rtx const im8 = GEN_INT (-8);
3486 rtx ins_tmps[MAX_MOVE_WORDS];
3487 rtx st_tmp_1, st_tmp_2, dreg;
3488 rtx st_addr_1, st_addr_2, dmema;
3489 HOST_WIDE_INT i;
3490
3491 dmema = XEXP (dmem, 0);
3492 if (GET_CODE (dmema) == LO_SUM)
3493 dmema = force_reg (Pmode, dmema);
3494
3495 /* Generate all the tmp registers we need. */
3496 if (data_regs != NULL)
3497 for (i = 0; i < words; ++i)
3498 ins_tmps[i] = gen_reg_rtx(DImode);
3499 st_tmp_1 = gen_reg_rtx(DImode);
3500 st_tmp_2 = gen_reg_rtx(DImode);
3501
3502 if (ofs != 0)
3503 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3504
3505 st_addr_2 = change_address (dmem, DImode,
3506 gen_rtx_AND (DImode,
3507 plus_constant (dmema, words*8 - 1),
3508 im8));
3509 set_mem_alias_set (st_addr_2, 0);
3510
3511 st_addr_1 = change_address (dmem, DImode,
3512 gen_rtx_AND (DImode, dmema, im8));
3513 set_mem_alias_set (st_addr_1, 0);
3514
3515 /* Load up the destination end bits. */
3516 emit_move_insn (st_tmp_2, st_addr_2);
3517 emit_move_insn (st_tmp_1, st_addr_1);
3518
3519 /* Shift the input data into place. */
3520 dreg = copy_addr_to_reg (dmema);
3521 if (data_regs != NULL)
3522 {
3523 for (i = words-1; i >= 0; --i)
3524 {
3525 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3526 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3527 }
3528 for (i = words-1; i > 0; --i)
3529 {
3530 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3531 ins_tmps[i-1], ins_tmps[i-1], 1,
3532 OPTAB_WIDEN);
3533 }
3534 }
3535
3536 /* Split and merge the ends with the destination data. */
3537 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3538 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3539
3540 if (data_regs != NULL)
3541 {
3542 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3543 st_tmp_2, 1, OPTAB_WIDEN);
3544 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3545 st_tmp_1, 1, OPTAB_WIDEN);
3546 }
3547
3548 /* Store it all. */
3549 emit_move_insn (st_addr_2, st_tmp_2);
3550 for (i = words-1; i > 0; --i)
3551 {
3552 rtx tmp = change_address (dmem, DImode,
3553 gen_rtx_AND (DImode,
3554 plus_constant (dmema, i*8),
3555 im8));
3556 set_mem_alias_set (tmp, 0);
3557 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3558 }
3559 emit_move_insn (st_addr_1, st_tmp_1);
3560 }
3561
3562
3563 /* Expand string/block move operations.
3564
3565 operands[0] is the pointer to the destination.
3566 operands[1] is the pointer to the source.
3567 operands[2] is the number of bytes to move.
3568 operands[3] is the alignment. */
3569
3570 int
3571 alpha_expand_block_move (rtx operands[])
3572 {
3573 rtx bytes_rtx = operands[2];
3574 rtx align_rtx = operands[3];
3575 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3576 HOST_WIDE_INT bytes = orig_bytes;
3577 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3578 HOST_WIDE_INT dst_align = src_align;
3579 rtx orig_src = operands[1];
3580 rtx orig_dst = operands[0];
3581 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3582 rtx tmp;
3583 unsigned int i, words, ofs, nregs = 0;
3584
3585 if (orig_bytes <= 0)
3586 return 1;
3587 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3588 return 0;
3589
3590 /* Look for additional alignment information from recorded register info. */
3591
3592 tmp = XEXP (orig_src, 0);
3593 if (REG_P (tmp))
3594 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3595 else if (GET_CODE (tmp) == PLUS
3596 && REG_P (XEXP (tmp, 0))
3597 && CONST_INT_P (XEXP (tmp, 1)))
3598 {
3599 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3600 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3601
3602 if (a > src_align)
3603 {
3604 if (a >= 64 && c % 8 == 0)
3605 src_align = 64;
3606 else if (a >= 32 && c % 4 == 0)
3607 src_align = 32;
3608 else if (a >= 16 && c % 2 == 0)
3609 src_align = 16;
3610 }
3611 }
3612
3613 tmp = XEXP (orig_dst, 0);
3614 if (REG_P (tmp))
3615 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3616 else if (GET_CODE (tmp) == PLUS
3617 && REG_P (XEXP (tmp, 0))
3618 && CONST_INT_P (XEXP (tmp, 1)))
3619 {
3620 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3621 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3622
3623 if (a > dst_align)
3624 {
3625 if (a >= 64 && c % 8 == 0)
3626 dst_align = 64;
3627 else if (a >= 32 && c % 4 == 0)
3628 dst_align = 32;
3629 else if (a >= 16 && c % 2 == 0)
3630 dst_align = 16;
3631 }
3632 }
3633
3634 ofs = 0;
3635 if (src_align >= 64 && bytes >= 8)
3636 {
3637 words = bytes / 8;
3638
3639 for (i = 0; i < words; ++i)
3640 data_regs[nregs + i] = gen_reg_rtx (DImode);
3641
3642 for (i = 0; i < words; ++i)
3643 emit_move_insn (data_regs[nregs + i],
3644 adjust_address (orig_src, DImode, ofs + i * 8));
3645
3646 nregs += words;
3647 bytes -= words * 8;
3648 ofs += words * 8;
3649 }
3650
3651 if (src_align >= 32 && bytes >= 4)
3652 {
3653 words = bytes / 4;
3654
3655 for (i = 0; i < words; ++i)
3656 data_regs[nregs + i] = gen_reg_rtx (SImode);
3657
3658 for (i = 0; i < words; ++i)
3659 emit_move_insn (data_regs[nregs + i],
3660 adjust_address (orig_src, SImode, ofs + i * 4));
3661
3662 nregs += words;
3663 bytes -= words * 4;
3664 ofs += words * 4;
3665 }
3666
3667 if (bytes >= 8)
3668 {
3669 words = bytes / 8;
3670
3671 for (i = 0; i < words+1; ++i)
3672 data_regs[nregs + i] = gen_reg_rtx (DImode);
3673
3674 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3675 words, ofs);
3676
3677 nregs += words;
3678 bytes -= words * 8;
3679 ofs += words * 8;
3680 }
3681
3682 if (! TARGET_BWX && bytes >= 4)
3683 {
3684 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3685 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3686 bytes -= 4;
3687 ofs += 4;
3688 }
3689
3690 if (bytes >= 2)
3691 {
3692 if (src_align >= 16)
3693 {
3694 do {
3695 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3696 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3697 bytes -= 2;
3698 ofs += 2;
3699 } while (bytes >= 2);
3700 }
3701 else if (! TARGET_BWX)
3702 {
3703 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3704 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3705 bytes -= 2;
3706 ofs += 2;
3707 }
3708 }
3709
3710 while (bytes > 0)
3711 {
3712 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3713 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3714 bytes -= 1;
3715 ofs += 1;
3716 }
3717
3718 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3719
3720 /* Now save it back out again. */
3721
3722 i = 0, ofs = 0;
3723
3724 /* Write out the data in whatever chunks reading the source allowed. */
3725 if (dst_align >= 64)
3726 {
3727 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3728 {
3729 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3730 data_regs[i]);
3731 ofs += 8;
3732 i++;
3733 }
3734 }
3735
3736 if (dst_align >= 32)
3737 {
3738 /* If the source has remaining DImode regs, write them out in
3739 two pieces. */
3740 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3741 {
3742 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3743 NULL_RTX, 1, OPTAB_WIDEN);
3744
3745 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3746 gen_lowpart (SImode, data_regs[i]));
3747 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3748 gen_lowpart (SImode, tmp));
3749 ofs += 8;
3750 i++;
3751 }
3752
3753 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3754 {
3755 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3756 data_regs[i]);
3757 ofs += 4;
3758 i++;
3759 }
3760 }
3761
3762 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3763 {
3764 /* Write out a remaining block of words using unaligned methods. */
3765
3766 for (words = 1; i + words < nregs; words++)
3767 if (GET_MODE (data_regs[i + words]) != DImode)
3768 break;
3769
3770 if (words == 1)
3771 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3772 else
3773 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3774 words, ofs);
3775
3776 i += words;
3777 ofs += words * 8;
3778 }
3779
3780 /* Due to the above, this won't be aligned. */
3781 /* ??? If we have more than one of these, consider constructing full
3782 words in registers and using alpha_expand_unaligned_store_words. */
3783 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3784 {
3785 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3786 ofs += 4;
3787 i++;
3788 }
3789
3790 if (dst_align >= 16)
3791 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3792 {
3793 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3794 i++;
3795 ofs += 2;
3796 }
3797 else
3798 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3799 {
3800 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3801 i++;
3802 ofs += 2;
3803 }
3804
3805 /* The remainder must be byte copies. */
3806 while (i < nregs)
3807 {
3808 gcc_assert (GET_MODE (data_regs[i]) == QImode);
3809 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
3810 i++;
3811 ofs += 1;
3812 }
3813
3814 return 1;
3815 }
3816
3817 int
3818 alpha_expand_block_clear (rtx operands[])
3819 {
3820 rtx bytes_rtx = operands[1];
3821 rtx align_rtx = operands[3];
3822 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3823 HOST_WIDE_INT bytes = orig_bytes;
3824 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
3825 HOST_WIDE_INT alignofs = 0;
3826 rtx orig_dst = operands[0];
3827 rtx tmp;
3828 int i, words, ofs = 0;
3829
3830 if (orig_bytes <= 0)
3831 return 1;
3832 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3833 return 0;
3834
3835 /* Look for stricter alignment. */
3836 tmp = XEXP (orig_dst, 0);
3837 if (REG_P (tmp))
3838 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3839 else if (GET_CODE (tmp) == PLUS
3840 && REG_P (XEXP (tmp, 0))
3841 && CONST_INT_P (XEXP (tmp, 1)))
3842 {
3843 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3844 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3845
3846 if (a > align)
3847 {
3848 if (a >= 64)
3849 align = a, alignofs = 8 - c % 8;
3850 else if (a >= 32)
3851 align = a, alignofs = 4 - c % 4;
3852 else if (a >= 16)
3853 align = a, alignofs = 2 - c % 2;
3854 }
3855 }
3856
3857 /* Handle an unaligned prefix first. */
3858
3859 if (alignofs > 0)
3860 {
3861 #if HOST_BITS_PER_WIDE_INT >= 64
3862 /* Given that alignofs is bounded by align, the only time BWX could
3863 generate three stores is for a 7 byte fill. Prefer two individual
3864 stores over a load/mask/store sequence. */
3865 if ((!TARGET_BWX || alignofs == 7)
3866 && align >= 32
3867 && !(alignofs == 4 && bytes >= 4))
3868 {
3869 enum machine_mode mode = (align >= 64 ? DImode : SImode);
3870 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
3871 rtx mem, tmp;
3872 HOST_WIDE_INT mask;
3873
3874 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
3875 set_mem_alias_set (mem, 0);
3876
3877 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
3878 if (bytes < alignofs)
3879 {
3880 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
3881 ofs += bytes;
3882 bytes = 0;
3883 }
3884 else
3885 {
3886 bytes -= alignofs;
3887 ofs += alignofs;
3888 }
3889 alignofs = 0;
3890
3891 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
3892 NULL_RTX, 1, OPTAB_WIDEN);
3893
3894 emit_move_insn (mem, tmp);
3895 }
3896 #endif
3897
3898 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
3899 {
3900 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3901 bytes -= 1;
3902 ofs += 1;
3903 alignofs -= 1;
3904 }
3905 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
3906 {
3907 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
3908 bytes -= 2;
3909 ofs += 2;
3910 alignofs -= 2;
3911 }
3912 if (alignofs == 4 && bytes >= 4)
3913 {
3914 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3915 bytes -= 4;
3916 ofs += 4;
3917 alignofs = 0;
3918 }
3919
3920 /* If we've not used the extra lead alignment information by now,
3921 we won't be able to. Downgrade align to match what's left over. */
3922 if (alignofs > 0)
3923 {
3924 alignofs = alignofs & -alignofs;
3925 align = MIN (align, alignofs * BITS_PER_UNIT);
3926 }
3927 }
3928
3929 /* Handle a block of contiguous long-words. */
3930
3931 if (align >= 64 && bytes >= 8)
3932 {
3933 words = bytes / 8;
3934
3935 for (i = 0; i < words; ++i)
3936 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
3937 const0_rtx);
3938
3939 bytes -= words * 8;
3940 ofs += words * 8;
3941 }
3942
3943 /* If the block is large and appropriately aligned, emit a single
3944 store followed by a sequence of stq_u insns. */
3945
3946 if (align >= 32 && bytes > 16)
3947 {
3948 rtx orig_dsta;
3949
3950 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3951 bytes -= 4;
3952 ofs += 4;
3953
3954 orig_dsta = XEXP (orig_dst, 0);
3955 if (GET_CODE (orig_dsta) == LO_SUM)
3956 orig_dsta = force_reg (Pmode, orig_dsta);
3957
3958 words = bytes / 8;
3959 for (i = 0; i < words; ++i)
3960 {
3961 rtx mem
3962 = change_address (orig_dst, DImode,
3963 gen_rtx_AND (DImode,
3964 plus_constant (orig_dsta, ofs + i*8),
3965 GEN_INT (-8)));
3966 set_mem_alias_set (mem, 0);
3967 emit_move_insn (mem, const0_rtx);
3968 }
3969
3970 /* Depending on the alignment, the first stq_u may have overlapped
3971 with the initial stl, which means that the last stq_u didn't
3972 write as much as it would appear. Leave those questionable bytes
3973 unaccounted for. */
3974 bytes -= words * 8 - 4;
3975 ofs += words * 8 - 4;
3976 }
3977
3978 /* Handle a smaller block of aligned words. */
3979
3980 if ((align >= 64 && bytes == 4)
3981 || (align == 32 && bytes >= 4))
3982 {
3983 words = bytes / 4;
3984
3985 for (i = 0; i < words; ++i)
3986 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
3987 const0_rtx);
3988
3989 bytes -= words * 4;
3990 ofs += words * 4;
3991 }
3992
3993 /* An unaligned block uses stq_u stores for as many as possible. */
3994
3995 if (bytes >= 8)
3996 {
3997 words = bytes / 8;
3998
3999 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4000
4001 bytes -= words * 8;
4002 ofs += words * 8;
4003 }
4004
4005 /* Next clean up any trailing pieces. */
4006
4007 #if HOST_BITS_PER_WIDE_INT >= 64
4008 /* Count the number of bits in BYTES for which aligned stores could
4009 be emitted. */
4010 words = 0;
4011 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4012 if (bytes & i)
4013 words += 1;
4014
4015 /* If we have appropriate alignment (and it wouldn't take too many
4016 instructions otherwise), mask out the bytes we need. */
4017 if (TARGET_BWX ? words > 2 : bytes > 0)
4018 {
4019 if (align >= 64)
4020 {
4021 rtx mem, tmp;
4022 HOST_WIDE_INT mask;
4023
4024 mem = adjust_address (orig_dst, DImode, ofs);
4025 set_mem_alias_set (mem, 0);
4026
4027 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4028
4029 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4030 NULL_RTX, 1, OPTAB_WIDEN);
4031
4032 emit_move_insn (mem, tmp);
4033 return 1;
4034 }
4035 else if (align >= 32 && bytes < 4)
4036 {
4037 rtx mem, tmp;
4038 HOST_WIDE_INT mask;
4039
4040 mem = adjust_address (orig_dst, SImode, ofs);
4041 set_mem_alias_set (mem, 0);
4042
4043 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4044
4045 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4046 NULL_RTX, 1, OPTAB_WIDEN);
4047
4048 emit_move_insn (mem, tmp);
4049 return 1;
4050 }
4051 }
4052 #endif
4053
4054 if (!TARGET_BWX && bytes >= 4)
4055 {
4056 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4057 bytes -= 4;
4058 ofs += 4;
4059 }
4060
4061 if (bytes >= 2)
4062 {
4063 if (align >= 16)
4064 {
4065 do {
4066 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4067 const0_rtx);
4068 bytes -= 2;
4069 ofs += 2;
4070 } while (bytes >= 2);
4071 }
4072 else if (! TARGET_BWX)
4073 {
4074 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4075 bytes -= 2;
4076 ofs += 2;
4077 }
4078 }
4079
4080 while (bytes > 0)
4081 {
4082 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4083 bytes -= 1;
4084 ofs += 1;
4085 }
4086
4087 return 1;
4088 }
4089
4090 /* Returns a mask so that zap(x, value) == x & mask. */
4091
4092 rtx
4093 alpha_expand_zap_mask (HOST_WIDE_INT value)
4094 {
4095 rtx result;
4096 int i;
4097
4098 if (HOST_BITS_PER_WIDE_INT >= 64)
4099 {
4100 HOST_WIDE_INT mask = 0;
4101
4102 for (i = 7; i >= 0; --i)
4103 {
4104 mask <<= 8;
4105 if (!((value >> i) & 1))
4106 mask |= 0xff;
4107 }
4108
4109 result = gen_int_mode (mask, DImode);
4110 }
4111 else
4112 {
4113 HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4114
4115 gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4116
4117 for (i = 7; i >= 4; --i)
4118 {
4119 mask_hi <<= 8;
4120 if (!((value >> i) & 1))
4121 mask_hi |= 0xff;
4122 }
4123
4124 for (i = 3; i >= 0; --i)
4125 {
4126 mask_lo <<= 8;
4127 if (!((value >> i) & 1))
4128 mask_lo |= 0xff;
4129 }
4130
4131 result = immed_double_const (mask_lo, mask_hi, DImode);
4132 }
4133
4134 return result;
4135 }
4136
4137 void
4138 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4139 enum machine_mode mode,
4140 rtx op0, rtx op1, rtx op2)
4141 {
4142 op0 = gen_lowpart (mode, op0);
4143
4144 if (op1 == const0_rtx)
4145 op1 = CONST0_RTX (mode);
4146 else
4147 op1 = gen_lowpart (mode, op1);
4148
4149 if (op2 == const0_rtx)
4150 op2 = CONST0_RTX (mode);
4151 else
4152 op2 = gen_lowpart (mode, op2);
4153
4154 emit_insn ((*gen) (op0, op1, op2));
4155 }
4156
4157 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4158 COND is true. Mark the jump as unlikely to be taken. */
4159
4160 static void
4161 emit_unlikely_jump (rtx cond, rtx label)
4162 {
4163 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
4164 rtx x;
4165
4166 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4167 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4168 add_reg_note (x, REG_BR_PROB, very_unlikely);
4169 }
4170
4171 /* A subroutine of the atomic operation splitters. Emit a load-locked
4172 instruction in MODE. */
4173
4174 static void
4175 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4176 {
4177 rtx (*fn) (rtx, rtx) = NULL;
4178 if (mode == SImode)
4179 fn = gen_load_locked_si;
4180 else if (mode == DImode)
4181 fn = gen_load_locked_di;
4182 emit_insn (fn (reg, mem));
4183 }
4184
4185 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4186 instruction in MODE. */
4187
4188 static void
4189 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4190 {
4191 rtx (*fn) (rtx, rtx, rtx) = NULL;
4192 if (mode == SImode)
4193 fn = gen_store_conditional_si;
4194 else if (mode == DImode)
4195 fn = gen_store_conditional_di;
4196 emit_insn (fn (res, mem, val));
4197 }
4198
4199 /* Subroutines of the atomic operation splitters. Emit barriers
4200 as needed for the memory MODEL. */
4201
4202 static void
4203 alpha_pre_atomic_barrier (enum memmodel model)
4204 {
4205 switch (model)
4206 {
4207 case MEMMODEL_RELAXED:
4208 case MEMMODEL_CONSUME:
4209 case MEMMODEL_ACQUIRE:
4210 break;
4211 case MEMMODEL_RELEASE:
4212 case MEMMODEL_ACQ_REL:
4213 case MEMMODEL_SEQ_CST:
4214 emit_insn (gen_memory_barrier ());
4215 break;
4216 default:
4217 gcc_unreachable ();
4218 }
4219 }
4220
4221 static void
4222 alpha_post_atomic_barrier (enum memmodel model)
4223 {
4224 switch (model)
4225 {
4226 case MEMMODEL_RELAXED:
4227 case MEMMODEL_CONSUME:
4228 case MEMMODEL_RELEASE:
4229 break;
4230 case MEMMODEL_ACQUIRE:
4231 case MEMMODEL_ACQ_REL:
4232 case MEMMODEL_SEQ_CST:
4233 emit_insn (gen_memory_barrier ());
4234 break;
4235 default:
4236 gcc_unreachable ();
4237 }
4238 }
4239
4240 /* A subroutine of the atomic operation splitters. Emit an insxl
4241 instruction in MODE. */
4242
4243 static rtx
4244 emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4245 {
4246 rtx ret = gen_reg_rtx (DImode);
4247 rtx (*fn) (rtx, rtx, rtx);
4248
4249 switch (mode)
4250 {
4251 case QImode:
4252 fn = gen_insbl;
4253 break;
4254 case HImode:
4255 fn = gen_inswl;
4256 break;
4257 case SImode:
4258 fn = gen_insll;
4259 break;
4260 case DImode:
4261 fn = gen_insql;
4262 break;
4263 default:
4264 gcc_unreachable ();
4265 }
4266
4267 op1 = force_reg (mode, op1);
4268 emit_insn (fn (ret, op1, op2));
4269
4270 return ret;
4271 }
4272
4273 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4274 to perform. MEM is the memory on which to operate. VAL is the second
4275 operand of the binary operator. BEFORE and AFTER are optional locations to
4276 return the value of MEM either before of after the operation. SCRATCH is
4277 a scratch register. */
4278
4279 void
4280 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4281 rtx after, rtx scratch, enum memmodel model)
4282 {
4283 enum machine_mode mode = GET_MODE (mem);
4284 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4285
4286 alpha_pre_atomic_barrier (model);
4287
4288 label = gen_label_rtx ();
4289 emit_label (label);
4290 label = gen_rtx_LABEL_REF (DImode, label);
4291
4292 if (before == NULL)
4293 before = scratch;
4294 emit_load_locked (mode, before, mem);
4295
4296 if (code == NOT)
4297 {
4298 x = gen_rtx_AND (mode, before, val);
4299 emit_insn (gen_rtx_SET (VOIDmode, val, x));
4300
4301 x = gen_rtx_NOT (mode, val);
4302 }
4303 else
4304 x = gen_rtx_fmt_ee (code, mode, before, val);
4305 if (after)
4306 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4307 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4308
4309 emit_store_conditional (mode, cond, mem, scratch);
4310
4311 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4312 emit_unlikely_jump (x, label);
4313
4314 alpha_post_atomic_barrier (model);
4315 }
4316
4317 /* Expand a compare and swap operation. */
4318
4319 void
4320 alpha_split_compare_and_swap (rtx operands[])
4321 {
4322 rtx cond, retval, mem, oldval, newval;
4323 bool is_weak;
4324 enum memmodel mod_s, mod_f;
4325 enum machine_mode mode;
4326 rtx label1, label2, x;
4327
4328 cond = operands[0];
4329 retval = operands[1];
4330 mem = operands[2];
4331 oldval = operands[3];
4332 newval = operands[4];
4333 is_weak = (operands[5] != const0_rtx);
4334 mod_s = (enum memmodel) INTVAL (operands[6]);
4335 mod_f = (enum memmodel) INTVAL (operands[7]);
4336 mode = GET_MODE (mem);
4337
4338 alpha_pre_atomic_barrier (mod_s);
4339
4340 label1 = NULL_RTX;
4341 if (!is_weak)
4342 {
4343 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4344 emit_label (XEXP (label1, 0));
4345 }
4346 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4347
4348 emit_load_locked (mode, retval, mem);
4349
4350 x = gen_lowpart (DImode, retval);
4351 if (oldval == const0_rtx)
4352 {
4353 emit_move_insn (cond, const0_rtx);
4354 x = gen_rtx_NE (DImode, x, const0_rtx);
4355 }
4356 else
4357 {
4358 x = gen_rtx_EQ (DImode, x, oldval);
4359 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4360 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4361 }
4362 emit_unlikely_jump (x, label2);
4363
4364 emit_move_insn (cond, newval);
4365 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4366
4367 if (!is_weak)
4368 {
4369 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4370 emit_unlikely_jump (x, label1);
4371 }
4372
4373 if (mod_f != MEMMODEL_RELAXED)
4374 emit_label (XEXP (label2, 0));
4375
4376 alpha_post_atomic_barrier (mod_s);
4377
4378 if (mod_f == MEMMODEL_RELAXED)
4379 emit_label (XEXP (label2, 0));
4380 }
4381
4382 void
4383 alpha_expand_compare_and_swap_12 (rtx operands[])
4384 {
4385 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4386 enum machine_mode mode;
4387 rtx addr, align, wdst;
4388 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4389
4390 cond = operands[0];
4391 dst = operands[1];
4392 mem = operands[2];
4393 oldval = operands[3];
4394 newval = operands[4];
4395 is_weak = operands[5];
4396 mod_s = operands[6];
4397 mod_f = operands[7];
4398 mode = GET_MODE (mem);
4399
4400 /* We forced the address into a register via mem_noofs_operand. */
4401 addr = XEXP (mem, 0);
4402 gcc_assert (register_operand (addr, DImode));
4403
4404 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4405 NULL_RTX, 1, OPTAB_DIRECT);
4406
4407 oldval = convert_modes (DImode, mode, oldval, 1);
4408
4409 if (newval != const0_rtx)
4410 newval = emit_insxl (mode, newval, addr);
4411
4412 wdst = gen_reg_rtx (DImode);
4413 if (mode == QImode)
4414 gen = gen_atomic_compare_and_swapqi_1;
4415 else
4416 gen = gen_atomic_compare_and_swaphi_1;
4417 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4418 is_weak, mod_s, mod_f));
4419
4420 emit_move_insn (dst, gen_lowpart (mode, wdst));
4421 }
4422
4423 void
4424 alpha_split_compare_and_swap_12 (rtx operands[])
4425 {
4426 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4427 enum machine_mode mode;
4428 bool is_weak;
4429 enum memmodel mod_s, mod_f;
4430 rtx label1, label2, mem, addr, width, mask, x;
4431
4432 cond = operands[0];
4433 dest = operands[1];
4434 orig_mem = operands[2];
4435 oldval = operands[3];
4436 newval = operands[4];
4437 align = operands[5];
4438 is_weak = (operands[6] != const0_rtx);
4439 mod_s = (enum memmodel) INTVAL (operands[7]);
4440 mod_f = (enum memmodel) INTVAL (operands[8]);
4441 scratch = operands[9];
4442 mode = GET_MODE (orig_mem);
4443 addr = XEXP (orig_mem, 0);
4444
4445 mem = gen_rtx_MEM (DImode, align);
4446 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4447 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4448 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4449
4450 alpha_pre_atomic_barrier (mod_s);
4451
4452 label1 = NULL_RTX;
4453 if (!is_weak)
4454 {
4455 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4456 emit_label (XEXP (label1, 0));
4457 }
4458 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4459
4460 emit_load_locked (DImode, scratch, mem);
4461
4462 width = GEN_INT (GET_MODE_BITSIZE (mode));
4463 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4464 emit_insn (gen_extxl (dest, scratch, width, addr));
4465
4466 if (oldval == const0_rtx)
4467 {
4468 emit_move_insn (cond, const0_rtx);
4469 x = gen_rtx_NE (DImode, dest, const0_rtx);
4470 }
4471 else
4472 {
4473 x = gen_rtx_EQ (DImode, dest, oldval);
4474 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4475 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4476 }
4477 emit_unlikely_jump (x, label2);
4478
4479 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4480
4481 if (newval != const0_rtx)
4482 emit_insn (gen_iordi3 (cond, cond, newval));
4483
4484 emit_store_conditional (DImode, cond, mem, cond);
4485
4486 if (!is_weak)
4487 {
4488 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4489 emit_unlikely_jump (x, label1);
4490 }
4491
4492 if (mod_f != MEMMODEL_RELAXED)
4493 emit_label (XEXP (label2, 0));
4494
4495 alpha_post_atomic_barrier (mod_s);
4496
4497 if (mod_f == MEMMODEL_RELAXED)
4498 emit_label (XEXP (label2, 0));
4499 }
4500
4501 /* Expand an atomic exchange operation. */
4502
4503 void
4504 alpha_split_atomic_exchange (rtx operands[])
4505 {
4506 rtx retval, mem, val, scratch;
4507 enum memmodel model;
4508 enum machine_mode mode;
4509 rtx label, x, cond;
4510
4511 retval = operands[0];
4512 mem = operands[1];
4513 val = operands[2];
4514 model = (enum memmodel) INTVAL (operands[3]);
4515 scratch = operands[4];
4516 mode = GET_MODE (mem);
4517 cond = gen_lowpart (DImode, scratch);
4518
4519 alpha_pre_atomic_barrier (model);
4520
4521 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4522 emit_label (XEXP (label, 0));
4523
4524 emit_load_locked (mode, retval, mem);
4525 emit_move_insn (scratch, val);
4526 emit_store_conditional (mode, cond, mem, scratch);
4527
4528 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4529 emit_unlikely_jump (x, label);
4530
4531 alpha_post_atomic_barrier (model);
4532 }
4533
4534 void
4535 alpha_expand_atomic_exchange_12 (rtx operands[])
4536 {
4537 rtx dst, mem, val, model;
4538 enum machine_mode mode;
4539 rtx addr, align, wdst;
4540 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4541
4542 dst = operands[0];
4543 mem = operands[1];
4544 val = operands[2];
4545 model = operands[3];
4546 mode = GET_MODE (mem);
4547
4548 /* We forced the address into a register via mem_noofs_operand. */
4549 addr = XEXP (mem, 0);
4550 gcc_assert (register_operand (addr, DImode));
4551
4552 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4553 NULL_RTX, 1, OPTAB_DIRECT);
4554
4555 /* Insert val into the correct byte location within the word. */
4556 if (val != const0_rtx)
4557 val = emit_insxl (mode, val, addr);
4558
4559 wdst = gen_reg_rtx (DImode);
4560 if (mode == QImode)
4561 gen = gen_atomic_exchangeqi_1;
4562 else
4563 gen = gen_atomic_exchangehi_1;
4564 emit_insn (gen (wdst, mem, val, align, model));
4565
4566 emit_move_insn (dst, gen_lowpart (mode, wdst));
4567 }
4568
4569 void
4570 alpha_split_atomic_exchange_12 (rtx operands[])
4571 {
4572 rtx dest, orig_mem, addr, val, align, scratch;
4573 rtx label, mem, width, mask, x;
4574 enum machine_mode mode;
4575 enum memmodel model;
4576
4577 dest = operands[0];
4578 orig_mem = operands[1];
4579 val = operands[2];
4580 align = operands[3];
4581 model = (enum memmodel) INTVAL (operands[4]);
4582 scratch = operands[5];
4583 mode = GET_MODE (orig_mem);
4584 addr = XEXP (orig_mem, 0);
4585
4586 mem = gen_rtx_MEM (DImode, align);
4587 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4588 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4589 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4590
4591 alpha_pre_atomic_barrier (model);
4592
4593 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4594 emit_label (XEXP (label, 0));
4595
4596 emit_load_locked (DImode, scratch, mem);
4597
4598 width = GEN_INT (GET_MODE_BITSIZE (mode));
4599 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4600 emit_insn (gen_extxl (dest, scratch, width, addr));
4601 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4602 if (val != const0_rtx)
4603 emit_insn (gen_iordi3 (scratch, scratch, val));
4604
4605 emit_store_conditional (DImode, scratch, mem, scratch);
4606
4607 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4608 emit_unlikely_jump (x, label);
4609
4610 alpha_post_atomic_barrier (model);
4611 }
4612 \f
4613 /* Adjust the cost of a scheduling dependency. Return the new cost of
4614 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4615
4616 static int
4617 alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4618 {
4619 enum attr_type dep_insn_type;
4620
4621 /* If the dependence is an anti-dependence, there is no cost. For an
4622 output dependence, there is sometimes a cost, but it doesn't seem
4623 worth handling those few cases. */
4624 if (REG_NOTE_KIND (link) != 0)
4625 return cost;
4626
4627 /* If we can't recognize the insns, we can't really do anything. */
4628 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4629 return cost;
4630
4631 dep_insn_type = get_attr_type (dep_insn);
4632
4633 /* Bring in the user-defined memory latency. */
4634 if (dep_insn_type == TYPE_ILD
4635 || dep_insn_type == TYPE_FLD
4636 || dep_insn_type == TYPE_LDSYM)
4637 cost += alpha_memory_latency-1;
4638
4639 /* Everything else handled in DFA bypasses now. */
4640
4641 return cost;
4642 }
4643
4644 /* The number of instructions that can be issued per cycle. */
4645
4646 static int
4647 alpha_issue_rate (void)
4648 {
4649 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4650 }
4651
4652 /* How many alternative schedules to try. This should be as wide as the
4653 scheduling freedom in the DFA, but no wider. Making this value too
4654 large results extra work for the scheduler.
4655
4656 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4657 alternative schedules. For EV5, we can choose between E0/E1 and
4658 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4659
4660 static int
4661 alpha_multipass_dfa_lookahead (void)
4662 {
4663 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4664 }
4665 \f
4666 /* Machine-specific function data. */
4667
4668 struct GTY(()) alpha_links;
4669
4670 struct GTY(()) machine_function
4671 {
4672 /* For OSF. */
4673 const char *some_ld_name;
4674
4675 /* For TARGET_LD_BUGGY_LDGP. */
4676 rtx gp_save_rtx;
4677
4678 /* For VMS condition handlers. */
4679 bool uses_condition_handler;
4680
4681 /* Linkage entries. */
4682 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
4683 links;
4684 };
4685
4686 /* How to allocate a 'struct machine_function'. */
4687
4688 static struct machine_function *
4689 alpha_init_machine_status (void)
4690 {
4691 return ggc_alloc_cleared_machine_function ();
4692 }
4693
4694 /* Support for frame based VMS condition handlers. */
4695
4696 /* A VMS condition handler may be established for a function with a call to
4697 __builtin_establish_vms_condition_handler, and cancelled with a call to
4698 __builtin_revert_vms_condition_handler.
4699
4700 The VMS Condition Handling Facility knows about the existence of a handler
4701 from the procedure descriptor .handler field. As the VMS native compilers,
4702 we store the user specified handler's address at a fixed location in the
4703 stack frame and point the procedure descriptor at a common wrapper which
4704 fetches the real handler's address and issues an indirect call.
4705
4706 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4707
4708 We force the procedure kind to PT_STACK, and the fixed frame location is
4709 fp+8, just before the register save area. We use the handler_data field in
4710 the procedure descriptor to state the fp offset at which the installed
4711 handler address can be found. */
4712
4713 #define VMS_COND_HANDLER_FP_OFFSET 8
4714
4715 /* Expand code to store the currently installed user VMS condition handler
4716 into TARGET and install HANDLER as the new condition handler. */
4717
4718 void
4719 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4720 {
4721 rtx handler_slot_address
4722 = plus_constant (hard_frame_pointer_rtx, VMS_COND_HANDLER_FP_OFFSET);
4723
4724 rtx handler_slot
4725 = gen_rtx_MEM (DImode, handler_slot_address);
4726
4727 emit_move_insn (target, handler_slot);
4728 emit_move_insn (handler_slot, handler);
4729
4730 /* Notify the start/prologue/epilogue emitters that the condition handler
4731 slot is needed. In addition to reserving the slot space, this will force
4732 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4733 use above is correct. */
4734 cfun->machine->uses_condition_handler = true;
4735 }
4736
4737 /* Expand code to store the current VMS condition handler into TARGET and
4738 nullify it. */
4739
4740 void
4741 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4742 {
4743 /* We implement this by establishing a null condition handler, with the tiny
4744 side effect of setting uses_condition_handler. This is a little bit
4745 pessimistic if no actual builtin_establish call is ever issued, which is
4746 not a real problem and expected never to happen anyway. */
4747
4748 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4749 }
4750
4751 /* Functions to save and restore alpha_return_addr_rtx. */
4752
4753 /* Start the ball rolling with RETURN_ADDR_RTX. */
4754
4755 rtx
4756 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4757 {
4758 if (count != 0)
4759 return const0_rtx;
4760
4761 return get_hard_reg_initial_val (Pmode, REG_RA);
4762 }
4763
4764 /* Return or create a memory slot containing the gp value for the current
4765 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4766
4767 rtx
4768 alpha_gp_save_rtx (void)
4769 {
4770 rtx seq, m = cfun->machine->gp_save_rtx;
4771
4772 if (m == NULL)
4773 {
4774 start_sequence ();
4775
4776 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4777 m = validize_mem (m);
4778 emit_move_insn (m, pic_offset_table_rtx);
4779
4780 seq = get_insns ();
4781 end_sequence ();
4782
4783 /* We used to simply emit the sequence after entry_of_function.
4784 However this breaks the CFG if the first instruction in the
4785 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4786 label. Emit the sequence properly on the edge. We are only
4787 invoked from dw2_build_landing_pads and finish_eh_generation
4788 will call commit_edge_insertions thanks to a kludge. */
4789 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4790
4791 cfun->machine->gp_save_rtx = m;
4792 }
4793
4794 return m;
4795 }
4796
4797 static void
4798 alpha_instantiate_decls (void)
4799 {
4800 if (cfun->machine->gp_save_rtx != NULL_RTX)
4801 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4802 }
4803
4804 static int
4805 alpha_ra_ever_killed (void)
4806 {
4807 rtx top;
4808
4809 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4810 return (int)df_regs_ever_live_p (REG_RA);
4811
4812 push_topmost_sequence ();
4813 top = get_insns ();
4814 pop_topmost_sequence ();
4815
4816 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
4817 }
4818
4819 \f
4820 /* Return the trap mode suffix applicable to the current
4821 instruction, or NULL. */
4822
4823 static const char *
4824 get_trap_mode_suffix (void)
4825 {
4826 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4827
4828 switch (s)
4829 {
4830 case TRAP_SUFFIX_NONE:
4831 return NULL;
4832
4833 case TRAP_SUFFIX_SU:
4834 if (alpha_fptm >= ALPHA_FPTM_SU)
4835 return "su";
4836 return NULL;
4837
4838 case TRAP_SUFFIX_SUI:
4839 if (alpha_fptm >= ALPHA_FPTM_SUI)
4840 return "sui";
4841 return NULL;
4842
4843 case TRAP_SUFFIX_V_SV:
4844 switch (alpha_fptm)
4845 {
4846 case ALPHA_FPTM_N:
4847 return NULL;
4848 case ALPHA_FPTM_U:
4849 return "v";
4850 case ALPHA_FPTM_SU:
4851 case ALPHA_FPTM_SUI:
4852 return "sv";
4853 default:
4854 gcc_unreachable ();
4855 }
4856
4857 case TRAP_SUFFIX_V_SV_SVI:
4858 switch (alpha_fptm)
4859 {
4860 case ALPHA_FPTM_N:
4861 return NULL;
4862 case ALPHA_FPTM_U:
4863 return "v";
4864 case ALPHA_FPTM_SU:
4865 return "sv";
4866 case ALPHA_FPTM_SUI:
4867 return "svi";
4868 default:
4869 gcc_unreachable ();
4870 }
4871 break;
4872
4873 case TRAP_SUFFIX_U_SU_SUI:
4874 switch (alpha_fptm)
4875 {
4876 case ALPHA_FPTM_N:
4877 return NULL;
4878 case ALPHA_FPTM_U:
4879 return "u";
4880 case ALPHA_FPTM_SU:
4881 return "su";
4882 case ALPHA_FPTM_SUI:
4883 return "sui";
4884 default:
4885 gcc_unreachable ();
4886 }
4887 break;
4888
4889 default:
4890 gcc_unreachable ();
4891 }
4892 gcc_unreachable ();
4893 }
4894
4895 /* Return the rounding mode suffix applicable to the current
4896 instruction, or NULL. */
4897
4898 static const char *
4899 get_round_mode_suffix (void)
4900 {
4901 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
4902
4903 switch (s)
4904 {
4905 case ROUND_SUFFIX_NONE:
4906 return NULL;
4907 case ROUND_SUFFIX_NORMAL:
4908 switch (alpha_fprm)
4909 {
4910 case ALPHA_FPRM_NORM:
4911 return NULL;
4912 case ALPHA_FPRM_MINF:
4913 return "m";
4914 case ALPHA_FPRM_CHOP:
4915 return "c";
4916 case ALPHA_FPRM_DYN:
4917 return "d";
4918 default:
4919 gcc_unreachable ();
4920 }
4921 break;
4922
4923 case ROUND_SUFFIX_C:
4924 return "c";
4925
4926 default:
4927 gcc_unreachable ();
4928 }
4929 gcc_unreachable ();
4930 }
4931
4932 /* Locate some local-dynamic symbol still in use by this function
4933 so that we can print its name in some movdi_er_tlsldm pattern. */
4934
4935 static int
4936 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
4937 {
4938 rtx x = *px;
4939
4940 if (GET_CODE (x) == SYMBOL_REF
4941 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
4942 {
4943 cfun->machine->some_ld_name = XSTR (x, 0);
4944 return 1;
4945 }
4946
4947 return 0;
4948 }
4949
4950 static const char *
4951 get_some_local_dynamic_name (void)
4952 {
4953 rtx insn;
4954
4955 if (cfun->machine->some_ld_name)
4956 return cfun->machine->some_ld_name;
4957
4958 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
4959 if (INSN_P (insn)
4960 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
4961 return cfun->machine->some_ld_name;
4962
4963 gcc_unreachable ();
4964 }
4965
4966 /* Print an operand. Recognize special options, documented below. */
4967
4968 void
4969 print_operand (FILE *file, rtx x, int code)
4970 {
4971 int i;
4972
4973 switch (code)
4974 {
4975 case '~':
4976 /* Print the assembler name of the current function. */
4977 assemble_name (file, alpha_fnname);
4978 break;
4979
4980 case '&':
4981 assemble_name (file, get_some_local_dynamic_name ());
4982 break;
4983
4984 case '/':
4985 {
4986 const char *trap = get_trap_mode_suffix ();
4987 const char *round = get_round_mode_suffix ();
4988
4989 if (trap || round)
4990 fprintf (file, (TARGET_AS_SLASH_BEFORE_SUFFIX ? "/%s%s" : "%s%s"),
4991 (trap ? trap : ""), (round ? round : ""));
4992 break;
4993 }
4994
4995 case ',':
4996 /* Generates single precision instruction suffix. */
4997 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
4998 break;
4999
5000 case '-':
5001 /* Generates double precision instruction suffix. */
5002 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5003 break;
5004
5005 case '#':
5006 if (alpha_this_literal_sequence_number == 0)
5007 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5008 fprintf (file, "%d", alpha_this_literal_sequence_number);
5009 break;
5010
5011 case '*':
5012 if (alpha_this_gpdisp_sequence_number == 0)
5013 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5014 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5015 break;
5016
5017 case 'H':
5018 if (GET_CODE (x) == HIGH)
5019 output_addr_const (file, XEXP (x, 0));
5020 else
5021 output_operand_lossage ("invalid %%H value");
5022 break;
5023
5024 case 'J':
5025 {
5026 const char *lituse;
5027
5028 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5029 {
5030 x = XVECEXP (x, 0, 0);
5031 lituse = "lituse_tlsgd";
5032 }
5033 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5034 {
5035 x = XVECEXP (x, 0, 0);
5036 lituse = "lituse_tlsldm";
5037 }
5038 else if (CONST_INT_P (x))
5039 lituse = "lituse_jsr";
5040 else
5041 {
5042 output_operand_lossage ("invalid %%J value");
5043 break;
5044 }
5045
5046 if (x != const0_rtx)
5047 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5048 }
5049 break;
5050
5051 case 'j':
5052 {
5053 const char *lituse;
5054
5055 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5056 lituse = "lituse_jsrdirect";
5057 #else
5058 lituse = "lituse_jsr";
5059 #endif
5060
5061 gcc_assert (INTVAL (x) != 0);
5062 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5063 }
5064 break;
5065 case 'r':
5066 /* If this operand is the constant zero, write it as "$31". */
5067 if (REG_P (x))
5068 fprintf (file, "%s", reg_names[REGNO (x)]);
5069 else if (x == CONST0_RTX (GET_MODE (x)))
5070 fprintf (file, "$31");
5071 else
5072 output_operand_lossage ("invalid %%r value");
5073 break;
5074
5075 case 'R':
5076 /* Similar, but for floating-point. */
5077 if (REG_P (x))
5078 fprintf (file, "%s", reg_names[REGNO (x)]);
5079 else if (x == CONST0_RTX (GET_MODE (x)))
5080 fprintf (file, "$f31");
5081 else
5082 output_operand_lossage ("invalid %%R value");
5083 break;
5084
5085 case 'N':
5086 /* Write the 1's complement of a constant. */
5087 if (!CONST_INT_P (x))
5088 output_operand_lossage ("invalid %%N value");
5089
5090 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5091 break;
5092
5093 case 'P':
5094 /* Write 1 << C, for a constant C. */
5095 if (!CONST_INT_P (x))
5096 output_operand_lossage ("invalid %%P value");
5097
5098 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5099 break;
5100
5101 case 'h':
5102 /* Write the high-order 16 bits of a constant, sign-extended. */
5103 if (!CONST_INT_P (x))
5104 output_operand_lossage ("invalid %%h value");
5105
5106 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5107 break;
5108
5109 case 'L':
5110 /* Write the low-order 16 bits of a constant, sign-extended. */
5111 if (!CONST_INT_P (x))
5112 output_operand_lossage ("invalid %%L value");
5113
5114 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5115 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5116 break;
5117
5118 case 'm':
5119 /* Write mask for ZAP insn. */
5120 if (GET_CODE (x) == CONST_DOUBLE)
5121 {
5122 HOST_WIDE_INT mask = 0;
5123 HOST_WIDE_INT value;
5124
5125 value = CONST_DOUBLE_LOW (x);
5126 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5127 i++, value >>= 8)
5128 if (value & 0xff)
5129 mask |= (1 << i);
5130
5131 value = CONST_DOUBLE_HIGH (x);
5132 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5133 i++, value >>= 8)
5134 if (value & 0xff)
5135 mask |= (1 << (i + sizeof (int)));
5136
5137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5138 }
5139
5140 else if (CONST_INT_P (x))
5141 {
5142 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5143
5144 for (i = 0; i < 8; i++, value >>= 8)
5145 if (value & 0xff)
5146 mask |= (1 << i);
5147
5148 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5149 }
5150 else
5151 output_operand_lossage ("invalid %%m value");
5152 break;
5153
5154 case 'M':
5155 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5156 if (!CONST_INT_P (x)
5157 || (INTVAL (x) != 8 && INTVAL (x) != 16
5158 && INTVAL (x) != 32 && INTVAL (x) != 64))
5159 output_operand_lossage ("invalid %%M value");
5160
5161 fprintf (file, "%s",
5162 (INTVAL (x) == 8 ? "b"
5163 : INTVAL (x) == 16 ? "w"
5164 : INTVAL (x) == 32 ? "l"
5165 : "q"));
5166 break;
5167
5168 case 'U':
5169 /* Similar, except do it from the mask. */
5170 if (CONST_INT_P (x))
5171 {
5172 HOST_WIDE_INT value = INTVAL (x);
5173
5174 if (value == 0xff)
5175 {
5176 fputc ('b', file);
5177 break;
5178 }
5179 if (value == 0xffff)
5180 {
5181 fputc ('w', file);
5182 break;
5183 }
5184 if (value == 0xffffffff)
5185 {
5186 fputc ('l', file);
5187 break;
5188 }
5189 if (value == -1)
5190 {
5191 fputc ('q', file);
5192 break;
5193 }
5194 }
5195 else if (HOST_BITS_PER_WIDE_INT == 32
5196 && GET_CODE (x) == CONST_DOUBLE
5197 && CONST_DOUBLE_LOW (x) == 0xffffffff
5198 && CONST_DOUBLE_HIGH (x) == 0)
5199 {
5200 fputc ('l', file);
5201 break;
5202 }
5203 output_operand_lossage ("invalid %%U value");
5204 break;
5205
5206 case 's':
5207 /* Write the constant value divided by 8. */
5208 if (!CONST_INT_P (x)
5209 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5210 || (INTVAL (x) & 7) != 0)
5211 output_operand_lossage ("invalid %%s value");
5212
5213 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5214 break;
5215
5216 case 'S':
5217 /* Same, except compute (64 - c) / 8 */
5218
5219 if (!CONST_INT_P (x)
5220 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5221 && (INTVAL (x) & 7) != 8)
5222 output_operand_lossage ("invalid %%s value");
5223
5224 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5225 break;
5226
5227 case 'C': case 'D': case 'c': case 'd':
5228 /* Write out comparison name. */
5229 {
5230 enum rtx_code c = GET_CODE (x);
5231
5232 if (!COMPARISON_P (x))
5233 output_operand_lossage ("invalid %%C value");
5234
5235 else if (code == 'D')
5236 c = reverse_condition (c);
5237 else if (code == 'c')
5238 c = swap_condition (c);
5239 else if (code == 'd')
5240 c = swap_condition (reverse_condition (c));
5241
5242 if (c == LEU)
5243 fprintf (file, "ule");
5244 else if (c == LTU)
5245 fprintf (file, "ult");
5246 else if (c == UNORDERED)
5247 fprintf (file, "un");
5248 else
5249 fprintf (file, "%s", GET_RTX_NAME (c));
5250 }
5251 break;
5252
5253 case 'E':
5254 /* Write the divide or modulus operator. */
5255 switch (GET_CODE (x))
5256 {
5257 case DIV:
5258 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5259 break;
5260 case UDIV:
5261 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5262 break;
5263 case MOD:
5264 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5265 break;
5266 case UMOD:
5267 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5268 break;
5269 default:
5270 output_operand_lossage ("invalid %%E value");
5271 break;
5272 }
5273 break;
5274
5275 case 'A':
5276 /* Write "_u" for unaligned access. */
5277 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5278 fprintf (file, "_u");
5279 break;
5280
5281 case 0:
5282 if (REG_P (x))
5283 fprintf (file, "%s", reg_names[REGNO (x)]);
5284 else if (MEM_P (x))
5285 output_address (XEXP (x, 0));
5286 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5287 {
5288 switch (XINT (XEXP (x, 0), 1))
5289 {
5290 case UNSPEC_DTPREL:
5291 case UNSPEC_TPREL:
5292 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5293 break;
5294 default:
5295 output_operand_lossage ("unknown relocation unspec");
5296 break;
5297 }
5298 }
5299 else
5300 output_addr_const (file, x);
5301 break;
5302
5303 default:
5304 output_operand_lossage ("invalid %%xn code");
5305 }
5306 }
5307
5308 void
5309 print_operand_address (FILE *file, rtx addr)
5310 {
5311 int basereg = 31;
5312 HOST_WIDE_INT offset = 0;
5313
5314 if (GET_CODE (addr) == AND)
5315 addr = XEXP (addr, 0);
5316
5317 if (GET_CODE (addr) == PLUS
5318 && CONST_INT_P (XEXP (addr, 1)))
5319 {
5320 offset = INTVAL (XEXP (addr, 1));
5321 addr = XEXP (addr, 0);
5322 }
5323
5324 if (GET_CODE (addr) == LO_SUM)
5325 {
5326 const char *reloc16, *reloclo;
5327 rtx op1 = XEXP (addr, 1);
5328
5329 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5330 {
5331 op1 = XEXP (op1, 0);
5332 switch (XINT (op1, 1))
5333 {
5334 case UNSPEC_DTPREL:
5335 reloc16 = NULL;
5336 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5337 break;
5338 case UNSPEC_TPREL:
5339 reloc16 = NULL;
5340 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5341 break;
5342 default:
5343 output_operand_lossage ("unknown relocation unspec");
5344 return;
5345 }
5346
5347 output_addr_const (file, XVECEXP (op1, 0, 0));
5348 }
5349 else
5350 {
5351 reloc16 = "gprel";
5352 reloclo = "gprellow";
5353 output_addr_const (file, op1);
5354 }
5355
5356 if (offset)
5357 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5358
5359 addr = XEXP (addr, 0);
5360 switch (GET_CODE (addr))
5361 {
5362 case REG:
5363 basereg = REGNO (addr);
5364 break;
5365
5366 case SUBREG:
5367 basereg = subreg_regno (addr);
5368 break;
5369
5370 default:
5371 gcc_unreachable ();
5372 }
5373
5374 fprintf (file, "($%d)\t\t!%s", basereg,
5375 (basereg == 29 ? reloc16 : reloclo));
5376 return;
5377 }
5378
5379 switch (GET_CODE (addr))
5380 {
5381 case REG:
5382 basereg = REGNO (addr);
5383 break;
5384
5385 case SUBREG:
5386 basereg = subreg_regno (addr);
5387 break;
5388
5389 case CONST_INT:
5390 offset = INTVAL (addr);
5391 break;
5392
5393 #if TARGET_ABI_OPEN_VMS
5394 case SYMBOL_REF:
5395 fprintf (file, "%s", XSTR (addr, 0));
5396 return;
5397
5398 case CONST:
5399 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5400 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5401 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5402 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5403 INTVAL (XEXP (XEXP (addr, 0), 1)));
5404 return;
5405
5406 #endif
5407 default:
5408 gcc_unreachable ();
5409 }
5410
5411 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5412 }
5413 \f
5414 /* Emit RTL insns to initialize the variable parts of a trampoline at
5415 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5416 for the static chain value for the function. */
5417
5418 static void
5419 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5420 {
5421 rtx fnaddr, mem, word1, word2;
5422
5423 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5424
5425 #ifdef POINTERS_EXTEND_UNSIGNED
5426 fnaddr = convert_memory_address (Pmode, fnaddr);
5427 chain_value = convert_memory_address (Pmode, chain_value);
5428 #endif
5429
5430 if (TARGET_ABI_OPEN_VMS)
5431 {
5432 const char *fnname;
5433 char *trname;
5434
5435 /* Construct the name of the trampoline entry point. */
5436 fnname = XSTR (fnaddr, 0);
5437 trname = (char *) alloca (strlen (fnname) + 5);
5438 strcpy (trname, fnname);
5439 strcat (trname, "..tr");
5440 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5441 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5442
5443 /* Trampoline (or "bounded") procedure descriptor is constructed from
5444 the function's procedure descriptor with certain fields zeroed IAW
5445 the VMS calling standard. This is stored in the first quadword. */
5446 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5447 word1 = expand_and (DImode, word1, GEN_INT (0xffff0fff0000fff0), NULL);
5448 }
5449 else
5450 {
5451 /* These 4 instructions are:
5452 ldq $1,24($27)
5453 ldq $27,16($27)
5454 jmp $31,($27),0
5455 nop
5456 We don't bother setting the HINT field of the jump; the nop
5457 is merely there for padding. */
5458 word1 = GEN_INT (0xa77b0010a43b0018);
5459 word2 = GEN_INT (0x47ff041f6bfb0000);
5460 }
5461
5462 /* Store the first two words, as computed above. */
5463 mem = adjust_address (m_tramp, DImode, 0);
5464 emit_move_insn (mem, word1);
5465 mem = adjust_address (m_tramp, DImode, 8);
5466 emit_move_insn (mem, word2);
5467
5468 /* Store function address and static chain value. */
5469 mem = adjust_address (m_tramp, Pmode, 16);
5470 emit_move_insn (mem, fnaddr);
5471 mem = adjust_address (m_tramp, Pmode, 24);
5472 emit_move_insn (mem, chain_value);
5473
5474 if (TARGET_ABI_OSF)
5475 {
5476 emit_insn (gen_imb ());
5477 #ifdef HAVE_ENABLE_EXECUTE_STACK
5478 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5479 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5480 #endif
5481 }
5482 }
5483 \f
5484 /* Determine where to put an argument to a function.
5485 Value is zero to push the argument on the stack,
5486 or a hard register in which to store the argument.
5487
5488 MODE is the argument's machine mode.
5489 TYPE is the data type of the argument (as a tree).
5490 This is null for libcalls where that information may
5491 not be available.
5492 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5493 the preceding args and about the function being called.
5494 NAMED is nonzero if this argument is a named parameter
5495 (otherwise it is an extra parameter matching an ellipsis).
5496
5497 On Alpha the first 6 words of args are normally in registers
5498 and the rest are pushed. */
5499
5500 static rtx
5501 alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
5502 const_tree type, bool named ATTRIBUTE_UNUSED)
5503 {
5504 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5505 int basereg;
5506 int num_args;
5507
5508 /* Don't get confused and pass small structures in FP registers. */
5509 if (type && AGGREGATE_TYPE_P (type))
5510 basereg = 16;
5511 else
5512 {
5513 #ifdef ENABLE_CHECKING
5514 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5515 values here. */
5516 gcc_assert (!COMPLEX_MODE_P (mode));
5517 #endif
5518
5519 /* Set up defaults for FP operands passed in FP registers, and
5520 integral operands passed in integer registers. */
5521 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5522 basereg = 32 + 16;
5523 else
5524 basereg = 16;
5525 }
5526
5527 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5528 the two platforms, so we can't avoid conditional compilation. */
5529 #if TARGET_ABI_OPEN_VMS
5530 {
5531 if (mode == VOIDmode)
5532 return alpha_arg_info_reg_val (*cum);
5533
5534 num_args = cum->num_args;
5535 if (num_args >= 6
5536 || targetm.calls.must_pass_in_stack (mode, type))
5537 return NULL_RTX;
5538 }
5539 #elif TARGET_ABI_OSF
5540 {
5541 if (*cum >= 6)
5542 return NULL_RTX;
5543 num_args = *cum;
5544
5545 /* VOID is passed as a special flag for "last argument". */
5546 if (type == void_type_node)
5547 basereg = 16;
5548 else if (targetm.calls.must_pass_in_stack (mode, type))
5549 return NULL_RTX;
5550 }
5551 #else
5552 #error Unhandled ABI
5553 #endif
5554
5555 return gen_rtx_REG (mode, num_args + basereg);
5556 }
5557
5558 /* Update the data in CUM to advance over an argument
5559 of mode MODE and data type TYPE.
5560 (TYPE is null for libcalls where that information may not be available.) */
5561
5562 static void
5563 alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
5564 const_tree type, bool named ATTRIBUTE_UNUSED)
5565 {
5566 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5567 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5568 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5569
5570 #if TARGET_ABI_OSF
5571 *cum += increment;
5572 #else
5573 if (!onstack && cum->num_args < 6)
5574 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5575 cum->num_args += increment;
5576 #endif
5577 }
5578
5579 static int
5580 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5581 enum machine_mode mode ATTRIBUTE_UNUSED,
5582 tree type ATTRIBUTE_UNUSED,
5583 bool named ATTRIBUTE_UNUSED)
5584 {
5585 int words = 0;
5586 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5587
5588 #if TARGET_ABI_OPEN_VMS
5589 if (cum->num_args < 6
5590 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5591 words = 6 - cum->num_args;
5592 #elif TARGET_ABI_OSF
5593 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5594 words = 6 - *cum;
5595 #else
5596 #error Unhandled ABI
5597 #endif
5598
5599 return words * UNITS_PER_WORD;
5600 }
5601
5602
5603 /* Return true if TYPE must be returned in memory, instead of in registers. */
5604
5605 static bool
5606 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5607 {
5608 enum machine_mode mode = VOIDmode;
5609 int size;
5610
5611 if (type)
5612 {
5613 mode = TYPE_MODE (type);
5614
5615 /* All aggregates are returned in memory, except on OpenVMS where
5616 records that fit 64 bits should be returned by immediate value
5617 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5618 if (TARGET_ABI_OPEN_VMS
5619 && TREE_CODE (type) != ARRAY_TYPE
5620 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5621 return false;
5622
5623 if (AGGREGATE_TYPE_P (type))
5624 return true;
5625 }
5626
5627 size = GET_MODE_SIZE (mode);
5628 switch (GET_MODE_CLASS (mode))
5629 {
5630 case MODE_VECTOR_FLOAT:
5631 /* Pass all float vectors in memory, like an aggregate. */
5632 return true;
5633
5634 case MODE_COMPLEX_FLOAT:
5635 /* We judge complex floats on the size of their element,
5636 not the size of the whole type. */
5637 size = GET_MODE_UNIT_SIZE (mode);
5638 break;
5639
5640 case MODE_INT:
5641 case MODE_FLOAT:
5642 case MODE_COMPLEX_INT:
5643 case MODE_VECTOR_INT:
5644 break;
5645
5646 default:
5647 /* ??? We get called on all sorts of random stuff from
5648 aggregate_value_p. We must return something, but it's not
5649 clear what's safe to return. Pretend it's a struct I
5650 guess. */
5651 return true;
5652 }
5653
5654 /* Otherwise types must fit in one register. */
5655 return size > UNITS_PER_WORD;
5656 }
5657
5658 /* Return true if TYPE should be passed by invisible reference. */
5659
5660 static bool
5661 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5662 enum machine_mode mode,
5663 const_tree type ATTRIBUTE_UNUSED,
5664 bool named ATTRIBUTE_UNUSED)
5665 {
5666 return mode == TFmode || mode == TCmode;
5667 }
5668
5669 /* Define how to find the value returned by a function. VALTYPE is the
5670 data type of the value (as a tree). If the precise function being
5671 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5672 MODE is set instead of VALTYPE for libcalls.
5673
5674 On Alpha the value is found in $0 for integer functions and
5675 $f0 for floating-point functions. */
5676
5677 rtx
5678 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5679 enum machine_mode mode)
5680 {
5681 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5682 enum mode_class mclass;
5683
5684 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5685
5686 if (valtype)
5687 mode = TYPE_MODE (valtype);
5688
5689 mclass = GET_MODE_CLASS (mode);
5690 switch (mclass)
5691 {
5692 case MODE_INT:
5693 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5694 where we have them returning both SImode and DImode. */
5695 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5696 PROMOTE_MODE (mode, dummy, valtype);
5697 /* FALLTHRU */
5698
5699 case MODE_COMPLEX_INT:
5700 case MODE_VECTOR_INT:
5701 regnum = 0;
5702 break;
5703
5704 case MODE_FLOAT:
5705 regnum = 32;
5706 break;
5707
5708 case MODE_COMPLEX_FLOAT:
5709 {
5710 enum machine_mode cmode = GET_MODE_INNER (mode);
5711
5712 return gen_rtx_PARALLEL
5713 (VOIDmode,
5714 gen_rtvec (2,
5715 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5716 const0_rtx),
5717 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5718 GEN_INT (GET_MODE_SIZE (cmode)))));
5719 }
5720
5721 case MODE_RANDOM:
5722 /* We should only reach here for BLKmode on VMS. */
5723 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5724 regnum = 0;
5725 break;
5726
5727 default:
5728 gcc_unreachable ();
5729 }
5730
5731 return gen_rtx_REG (mode, regnum);
5732 }
5733
5734 /* TCmode complex values are passed by invisible reference. We
5735 should not split these values. */
5736
5737 static bool
5738 alpha_split_complex_arg (const_tree type)
5739 {
5740 return TYPE_MODE (type) != TCmode;
5741 }
5742
5743 static tree
5744 alpha_build_builtin_va_list (void)
5745 {
5746 tree base, ofs, space, record, type_decl;
5747
5748 if (TARGET_ABI_OPEN_VMS)
5749 return ptr_type_node;
5750
5751 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5752 type_decl = build_decl (BUILTINS_LOCATION,
5753 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5754 TYPE_STUB_DECL (record) = type_decl;
5755 TYPE_NAME (record) = type_decl;
5756
5757 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5758
5759 /* Dummy field to prevent alignment warnings. */
5760 space = build_decl (BUILTINS_LOCATION,
5761 FIELD_DECL, NULL_TREE, integer_type_node);
5762 DECL_FIELD_CONTEXT (space) = record;
5763 DECL_ARTIFICIAL (space) = 1;
5764 DECL_IGNORED_P (space) = 1;
5765
5766 ofs = build_decl (BUILTINS_LOCATION,
5767 FIELD_DECL, get_identifier ("__offset"),
5768 integer_type_node);
5769 DECL_FIELD_CONTEXT (ofs) = record;
5770 DECL_CHAIN (ofs) = space;
5771 /* ??? This is a hack, __offset is marked volatile to prevent
5772 DCE that confuses stdarg optimization and results in
5773 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */
5774 TREE_THIS_VOLATILE (ofs) = 1;
5775
5776 base = build_decl (BUILTINS_LOCATION,
5777 FIELD_DECL, get_identifier ("__base"),
5778 ptr_type_node);
5779 DECL_FIELD_CONTEXT (base) = record;
5780 DECL_CHAIN (base) = ofs;
5781
5782 TYPE_FIELDS (record) = base;
5783 layout_type (record);
5784
5785 va_list_gpr_counter_field = ofs;
5786 return record;
5787 }
5788
5789 #if TARGET_ABI_OSF
5790 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5791 and constant additions. */
5792
5793 static gimple
5794 va_list_skip_additions (tree lhs)
5795 {
5796 gimple stmt;
5797
5798 for (;;)
5799 {
5800 enum tree_code code;
5801
5802 stmt = SSA_NAME_DEF_STMT (lhs);
5803
5804 if (gimple_code (stmt) == GIMPLE_PHI)
5805 return stmt;
5806
5807 if (!is_gimple_assign (stmt)
5808 || gimple_assign_lhs (stmt) != lhs)
5809 return NULL;
5810
5811 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5812 return stmt;
5813 code = gimple_assign_rhs_code (stmt);
5814 if (!CONVERT_EXPR_CODE_P (code)
5815 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5816 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5817 || !host_integerp (gimple_assign_rhs2 (stmt), 1)))
5818 return stmt;
5819
5820 lhs = gimple_assign_rhs1 (stmt);
5821 }
5822 }
5823
5824 /* Check if LHS = RHS statement is
5825 LHS = *(ap.__base + ap.__offset + cst)
5826 or
5827 LHS = *(ap.__base
5828 + ((ap.__offset + cst <= 47)
5829 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5830 If the former, indicate that GPR registers are needed,
5831 if the latter, indicate that FPR registers are needed.
5832
5833 Also look for LHS = (*ptr).field, where ptr is one of the forms
5834 listed above.
5835
5836 On alpha, cfun->va_list_gpr_size is used as size of the needed
5837 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5838 registers are needed and bit 1 set if FPR registers are needed.
5839 Return true if va_list references should not be scanned for the
5840 current statement. */
5841
5842 static bool
5843 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
5844 {
5845 tree base, offset, rhs;
5846 int offset_arg = 1;
5847 gimple base_stmt;
5848
5849 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5850 != GIMPLE_SINGLE_RHS)
5851 return false;
5852
5853 rhs = gimple_assign_rhs1 (stmt);
5854 while (handled_component_p (rhs))
5855 rhs = TREE_OPERAND (rhs, 0);
5856 if (TREE_CODE (rhs) != MEM_REF
5857 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5858 return false;
5859
5860 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5861 if (stmt == NULL
5862 || !is_gimple_assign (stmt)
5863 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5864 return false;
5865
5866 base = gimple_assign_rhs1 (stmt);
5867 if (TREE_CODE (base) == SSA_NAME)
5868 {
5869 base_stmt = va_list_skip_additions (base);
5870 if (base_stmt
5871 && is_gimple_assign (base_stmt)
5872 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5873 base = gimple_assign_rhs1 (base_stmt);
5874 }
5875
5876 if (TREE_CODE (base) != COMPONENT_REF
5877 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5878 {
5879 base = gimple_assign_rhs2 (stmt);
5880 if (TREE_CODE (base) == SSA_NAME)
5881 {
5882 base_stmt = va_list_skip_additions (base);
5883 if (base_stmt
5884 && is_gimple_assign (base_stmt)
5885 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5886 base = gimple_assign_rhs1 (base_stmt);
5887 }
5888
5889 if (TREE_CODE (base) != COMPONENT_REF
5890 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5891 return false;
5892
5893 offset_arg = 0;
5894 }
5895
5896 base = get_base_address (base);
5897 if (TREE_CODE (base) != VAR_DECL
5898 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base)))
5899 return false;
5900
5901 offset = gimple_op (stmt, 1 + offset_arg);
5902 if (TREE_CODE (offset) == SSA_NAME)
5903 {
5904 gimple offset_stmt = va_list_skip_additions (offset);
5905
5906 if (offset_stmt
5907 && gimple_code (offset_stmt) == GIMPLE_PHI)
5908 {
5909 HOST_WIDE_INT sub;
5910 gimple arg1_stmt, arg2_stmt;
5911 tree arg1, arg2;
5912 enum tree_code code1, code2;
5913
5914 if (gimple_phi_num_args (offset_stmt) != 2)
5915 goto escapes;
5916
5917 arg1_stmt
5918 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
5919 arg2_stmt
5920 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
5921 if (arg1_stmt == NULL
5922 || !is_gimple_assign (arg1_stmt)
5923 || arg2_stmt == NULL
5924 || !is_gimple_assign (arg2_stmt))
5925 goto escapes;
5926
5927 code1 = gimple_assign_rhs_code (arg1_stmt);
5928 code2 = gimple_assign_rhs_code (arg2_stmt);
5929 if (code1 == COMPONENT_REF
5930 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
5931 /* Do nothing. */;
5932 else if (code2 == COMPONENT_REF
5933 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
5934 {
5935 gimple tem = arg1_stmt;
5936 code2 = code1;
5937 arg1_stmt = arg2_stmt;
5938 arg2_stmt = tem;
5939 }
5940 else
5941 goto escapes;
5942
5943 if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0))
5944 goto escapes;
5945
5946 sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0);
5947 if (code2 == MINUS_EXPR)
5948 sub = -sub;
5949 if (sub < -48 || sub > -32)
5950 goto escapes;
5951
5952 arg1 = gimple_assign_rhs1 (arg1_stmt);
5953 arg2 = gimple_assign_rhs1 (arg2_stmt);
5954 if (TREE_CODE (arg2) == SSA_NAME)
5955 {
5956 arg2_stmt = va_list_skip_additions (arg2);
5957 if (arg2_stmt == NULL
5958 || !is_gimple_assign (arg2_stmt)
5959 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
5960 goto escapes;
5961 arg2 = gimple_assign_rhs1 (arg2_stmt);
5962 }
5963 if (arg1 != arg2)
5964 goto escapes;
5965
5966 if (TREE_CODE (arg1) != COMPONENT_REF
5967 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
5968 || get_base_address (arg1) != base)
5969 goto escapes;
5970
5971 /* Need floating point regs. */
5972 cfun->va_list_fpr_size |= 2;
5973 return false;
5974 }
5975 if (offset_stmt
5976 && is_gimple_assign (offset_stmt)
5977 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
5978 offset = gimple_assign_rhs1 (offset_stmt);
5979 }
5980 if (TREE_CODE (offset) != COMPONENT_REF
5981 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
5982 || get_base_address (offset) != base)
5983 goto escapes;
5984 else
5985 /* Need general regs. */
5986 cfun->va_list_fpr_size |= 1;
5987 return false;
5988
5989 escapes:
5990 si->va_list_escapes = true;
5991 return false;
5992 }
5993 #endif
5994
5995 /* Perform any needed actions needed for a function that is receiving a
5996 variable number of arguments. */
5997
5998 static void
5999 alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode,
6000 tree type, int *pretend_size, int no_rtl)
6001 {
6002 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6003
6004 /* Skip the current argument. */
6005 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6006 true);
6007
6008 #if TARGET_ABI_OPEN_VMS
6009 /* For VMS, we allocate space for all 6 arg registers plus a count.
6010
6011 However, if NO registers need to be saved, don't allocate any space.
6012 This is not only because we won't need the space, but because AP
6013 includes the current_pretend_args_size and we don't want to mess up
6014 any ap-relative addresses already made. */
6015 if (cum.num_args < 6)
6016 {
6017 if (!no_rtl)
6018 {
6019 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6020 emit_insn (gen_arg_home ());
6021 }
6022 *pretend_size = 7 * UNITS_PER_WORD;
6023 }
6024 #else
6025 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6026 only push those that are remaining. However, if NO registers need to
6027 be saved, don't allocate any space. This is not only because we won't
6028 need the space, but because AP includes the current_pretend_args_size
6029 and we don't want to mess up any ap-relative addresses already made.
6030
6031 If we are not to use the floating-point registers, save the integer
6032 registers where we would put the floating-point registers. This is
6033 not the most efficient way to implement varargs with just one register
6034 class, but it isn't worth doing anything more efficient in this rare
6035 case. */
6036 if (cum >= 6)
6037 return;
6038
6039 if (!no_rtl)
6040 {
6041 int count;
6042 alias_set_type set = get_varargs_alias_set ();
6043 rtx tmp;
6044
6045 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6046 if (count > 6 - cum)
6047 count = 6 - cum;
6048
6049 /* Detect whether integer registers or floating-point registers
6050 are needed by the detected va_arg statements. See above for
6051 how these values are computed. Note that the "escape" value
6052 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6053 these bits set. */
6054 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6055
6056 if (cfun->va_list_fpr_size & 1)
6057 {
6058 tmp = gen_rtx_MEM (BLKmode,
6059 plus_constant (virtual_incoming_args_rtx,
6060 (cum + 6) * UNITS_PER_WORD));
6061 MEM_NOTRAP_P (tmp) = 1;
6062 set_mem_alias_set (tmp, set);
6063 move_block_from_reg (16 + cum, tmp, count);
6064 }
6065
6066 if (cfun->va_list_fpr_size & 2)
6067 {
6068 tmp = gen_rtx_MEM (BLKmode,
6069 plus_constant (virtual_incoming_args_rtx,
6070 cum * UNITS_PER_WORD));
6071 MEM_NOTRAP_P (tmp) = 1;
6072 set_mem_alias_set (tmp, set);
6073 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6074 }
6075 }
6076 *pretend_size = 12 * UNITS_PER_WORD;
6077 #endif
6078 }
6079
6080 static void
6081 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6082 {
6083 HOST_WIDE_INT offset;
6084 tree t, offset_field, base_field;
6085
6086 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6087 return;
6088
6089 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6090 up by 48, storing fp arg registers in the first 48 bytes, and the
6091 integer arg registers in the next 48 bytes. This is only done,
6092 however, if any integer registers need to be stored.
6093
6094 If no integer registers need be stored, then we must subtract 48
6095 in order to account for the integer arg registers which are counted
6096 in argsize above, but which are not actually stored on the stack.
6097 Must further be careful here about structures straddling the last
6098 integer argument register; that futzes with pretend_args_size,
6099 which changes the meaning of AP. */
6100
6101 if (NUM_ARGS < 6)
6102 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6103 else
6104 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6105
6106 if (TARGET_ABI_OPEN_VMS)
6107 {
6108 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6109 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6110 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6111 TREE_SIDE_EFFECTS (t) = 1;
6112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6113 }
6114 else
6115 {
6116 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6117 offset_field = DECL_CHAIN (base_field);
6118
6119 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6120 valist, base_field, NULL_TREE);
6121 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6122 valist, offset_field, NULL_TREE);
6123
6124 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6125 t = fold_build_pointer_plus_hwi (t, offset);
6126 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6127 TREE_SIDE_EFFECTS (t) = 1;
6128 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6129
6130 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6131 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6132 TREE_SIDE_EFFECTS (t) = 1;
6133 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6134 }
6135 }
6136
6137 static tree
6138 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6139 gimple_seq *pre_p)
6140 {
6141 tree type_size, ptr_type, addend, t, addr;
6142 gimple_seq internal_post;
6143
6144 /* If the type could not be passed in registers, skip the block
6145 reserved for the registers. */
6146 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6147 {
6148 t = build_int_cst (TREE_TYPE (offset), 6*8);
6149 gimplify_assign (offset,
6150 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6151 pre_p);
6152 }
6153
6154 addend = offset;
6155 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6156
6157 if (TREE_CODE (type) == COMPLEX_TYPE)
6158 {
6159 tree real_part, imag_part, real_temp;
6160
6161 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6162 offset, pre_p);
6163
6164 /* Copy the value into a new temporary, lest the formal temporary
6165 be reused out from under us. */
6166 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6167
6168 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6169 offset, pre_p);
6170
6171 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6172 }
6173 else if (TREE_CODE (type) == REAL_TYPE)
6174 {
6175 tree fpaddend, cond, fourtyeight;
6176
6177 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6178 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6179 addend, fourtyeight);
6180 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6181 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6182 fpaddend, addend);
6183 }
6184
6185 /* Build the final address and force that value into a temporary. */
6186 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6187 internal_post = NULL;
6188 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6189 gimple_seq_add_seq (pre_p, internal_post);
6190
6191 /* Update the offset field. */
6192 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6193 if (type_size == NULL || TREE_OVERFLOW (type_size))
6194 t = size_zero_node;
6195 else
6196 {
6197 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6198 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6199 t = size_binop (MULT_EXPR, t, size_int (8));
6200 }
6201 t = fold_convert (TREE_TYPE (offset), t);
6202 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6203 pre_p);
6204
6205 return build_va_arg_indirect_ref (addr);
6206 }
6207
6208 static tree
6209 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6210 gimple_seq *post_p)
6211 {
6212 tree offset_field, base_field, offset, base, t, r;
6213 bool indirect;
6214
6215 if (TARGET_ABI_OPEN_VMS)
6216 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6217
6218 base_field = TYPE_FIELDS (va_list_type_node);
6219 offset_field = DECL_CHAIN (base_field);
6220 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6221 valist, base_field, NULL_TREE);
6222 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6223 valist, offset_field, NULL_TREE);
6224
6225 /* Pull the fields of the structure out into temporaries. Since we never
6226 modify the base field, we can use a formal temporary. Sign-extend the
6227 offset field so that it's the proper width for pointer arithmetic. */
6228 base = get_formal_tmp_var (base_field, pre_p);
6229
6230 t = fold_convert (lang_hooks.types.type_for_size (64, 0), offset_field);
6231 offset = get_initialized_tmp_var (t, pre_p, NULL);
6232
6233 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6234 if (indirect)
6235 type = build_pointer_type_for_mode (type, ptr_mode, true);
6236
6237 /* Find the value. Note that this will be a stable indirection, or
6238 a composite of stable indirections in the case of complex. */
6239 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6240
6241 /* Stuff the offset temporary back into its field. */
6242 gimplify_assign (unshare_expr (offset_field),
6243 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6244
6245 if (indirect)
6246 r = build_va_arg_indirect_ref (r);
6247
6248 return r;
6249 }
6250 \f
6251 /* Builtins. */
6252
6253 enum alpha_builtin
6254 {
6255 ALPHA_BUILTIN_CMPBGE,
6256 ALPHA_BUILTIN_EXTBL,
6257 ALPHA_BUILTIN_EXTWL,
6258 ALPHA_BUILTIN_EXTLL,
6259 ALPHA_BUILTIN_EXTQL,
6260 ALPHA_BUILTIN_EXTWH,
6261 ALPHA_BUILTIN_EXTLH,
6262 ALPHA_BUILTIN_EXTQH,
6263 ALPHA_BUILTIN_INSBL,
6264 ALPHA_BUILTIN_INSWL,
6265 ALPHA_BUILTIN_INSLL,
6266 ALPHA_BUILTIN_INSQL,
6267 ALPHA_BUILTIN_INSWH,
6268 ALPHA_BUILTIN_INSLH,
6269 ALPHA_BUILTIN_INSQH,
6270 ALPHA_BUILTIN_MSKBL,
6271 ALPHA_BUILTIN_MSKWL,
6272 ALPHA_BUILTIN_MSKLL,
6273 ALPHA_BUILTIN_MSKQL,
6274 ALPHA_BUILTIN_MSKWH,
6275 ALPHA_BUILTIN_MSKLH,
6276 ALPHA_BUILTIN_MSKQH,
6277 ALPHA_BUILTIN_UMULH,
6278 ALPHA_BUILTIN_ZAP,
6279 ALPHA_BUILTIN_ZAPNOT,
6280 ALPHA_BUILTIN_AMASK,
6281 ALPHA_BUILTIN_IMPLVER,
6282 ALPHA_BUILTIN_RPCC,
6283 ALPHA_BUILTIN_THREAD_POINTER,
6284 ALPHA_BUILTIN_SET_THREAD_POINTER,
6285 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6286 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6287
6288 /* TARGET_MAX */
6289 ALPHA_BUILTIN_MINUB8,
6290 ALPHA_BUILTIN_MINSB8,
6291 ALPHA_BUILTIN_MINUW4,
6292 ALPHA_BUILTIN_MINSW4,
6293 ALPHA_BUILTIN_MAXUB8,
6294 ALPHA_BUILTIN_MAXSB8,
6295 ALPHA_BUILTIN_MAXUW4,
6296 ALPHA_BUILTIN_MAXSW4,
6297 ALPHA_BUILTIN_PERR,
6298 ALPHA_BUILTIN_PKLB,
6299 ALPHA_BUILTIN_PKWB,
6300 ALPHA_BUILTIN_UNPKBL,
6301 ALPHA_BUILTIN_UNPKBW,
6302
6303 /* TARGET_CIX */
6304 ALPHA_BUILTIN_CTTZ,
6305 ALPHA_BUILTIN_CTLZ,
6306 ALPHA_BUILTIN_CTPOP,
6307
6308 ALPHA_BUILTIN_max
6309 };
6310
6311 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6312 CODE_FOR_builtin_cmpbge,
6313 CODE_FOR_extbl,
6314 CODE_FOR_extwl,
6315 CODE_FOR_extll,
6316 CODE_FOR_extql,
6317 CODE_FOR_extwh,
6318 CODE_FOR_extlh,
6319 CODE_FOR_extqh,
6320 CODE_FOR_builtin_insbl,
6321 CODE_FOR_builtin_inswl,
6322 CODE_FOR_builtin_insll,
6323 CODE_FOR_insql,
6324 CODE_FOR_inswh,
6325 CODE_FOR_inslh,
6326 CODE_FOR_insqh,
6327 CODE_FOR_mskbl,
6328 CODE_FOR_mskwl,
6329 CODE_FOR_mskll,
6330 CODE_FOR_mskql,
6331 CODE_FOR_mskwh,
6332 CODE_FOR_msklh,
6333 CODE_FOR_mskqh,
6334 CODE_FOR_umuldi3_highpart,
6335 CODE_FOR_builtin_zap,
6336 CODE_FOR_builtin_zapnot,
6337 CODE_FOR_builtin_amask,
6338 CODE_FOR_builtin_implver,
6339 CODE_FOR_builtin_rpcc,
6340 CODE_FOR_load_tp,
6341 CODE_FOR_set_tp,
6342 CODE_FOR_builtin_establish_vms_condition_handler,
6343 CODE_FOR_builtin_revert_vms_condition_handler,
6344
6345 /* TARGET_MAX */
6346 CODE_FOR_builtin_minub8,
6347 CODE_FOR_builtin_minsb8,
6348 CODE_FOR_builtin_minuw4,
6349 CODE_FOR_builtin_minsw4,
6350 CODE_FOR_builtin_maxub8,
6351 CODE_FOR_builtin_maxsb8,
6352 CODE_FOR_builtin_maxuw4,
6353 CODE_FOR_builtin_maxsw4,
6354 CODE_FOR_builtin_perr,
6355 CODE_FOR_builtin_pklb,
6356 CODE_FOR_builtin_pkwb,
6357 CODE_FOR_builtin_unpkbl,
6358 CODE_FOR_builtin_unpkbw,
6359
6360 /* TARGET_CIX */
6361 CODE_FOR_ctzdi2,
6362 CODE_FOR_clzdi2,
6363 CODE_FOR_popcountdi2
6364 };
6365
6366 struct alpha_builtin_def
6367 {
6368 const char *name;
6369 enum alpha_builtin code;
6370 unsigned int target_mask;
6371 bool is_const;
6372 };
6373
6374 static struct alpha_builtin_def const zero_arg_builtins[] = {
6375 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6376 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6377 };
6378
6379 static struct alpha_builtin_def const one_arg_builtins[] = {
6380 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6381 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6382 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6383 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6384 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6385 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6386 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6387 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6388 };
6389
6390 static struct alpha_builtin_def const two_arg_builtins[] = {
6391 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6392 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6393 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6394 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6395 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6396 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6397 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6398 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6399 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6400 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6401 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6402 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6403 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6404 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6405 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6406 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6407 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6408 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6409 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6410 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6411 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6412 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6413 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6414 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6415 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6416 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6417 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6418 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6419 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6420 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6421 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6422 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6423 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6424 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6425 };
6426
6427 static GTY(()) tree alpha_v8qi_u;
6428 static GTY(()) tree alpha_v8qi_s;
6429 static GTY(()) tree alpha_v4hi_u;
6430 static GTY(()) tree alpha_v4hi_s;
6431
6432 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6433
6434 /* Return the alpha builtin for CODE. */
6435
6436 static tree
6437 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6438 {
6439 if (code >= ALPHA_BUILTIN_max)
6440 return error_mark_node;
6441 return alpha_builtins[code];
6442 }
6443
6444 /* Helper function of alpha_init_builtins. Add the built-in specified
6445 by NAME, TYPE, CODE, and ECF. */
6446
6447 static void
6448 alpha_builtin_function (const char *name, tree ftype,
6449 enum alpha_builtin code, unsigned ecf)
6450 {
6451 tree decl = add_builtin_function (name, ftype, (int) code,
6452 BUILT_IN_MD, NULL, NULL_TREE);
6453
6454 if (ecf & ECF_CONST)
6455 TREE_READONLY (decl) = 1;
6456 if (ecf & ECF_NOTHROW)
6457 TREE_NOTHROW (decl) = 1;
6458
6459 alpha_builtins [(int) code] = decl;
6460 }
6461
6462 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6463 functions pointed to by P, with function type FTYPE. */
6464
6465 static void
6466 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6467 tree ftype)
6468 {
6469 size_t i;
6470
6471 for (i = 0; i < count; ++i, ++p)
6472 if ((target_flags & p->target_mask) == p->target_mask)
6473 alpha_builtin_function (p->name, ftype, p->code,
6474 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6475 }
6476
6477 static void
6478 alpha_init_builtins (void)
6479 {
6480 tree dimode_integer_type_node;
6481 tree ftype;
6482
6483 dimode_integer_type_node = lang_hooks.types.type_for_mode (DImode, 0);
6484
6485 ftype = build_function_type_list (dimode_integer_type_node, NULL_TREE);
6486 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins),
6487 ftype);
6488
6489 ftype = build_function_type_list (dimode_integer_type_node,
6490 dimode_integer_type_node, NULL_TREE);
6491 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins),
6492 ftype);
6493
6494 ftype = build_function_type_list (dimode_integer_type_node,
6495 dimode_integer_type_node,
6496 dimode_integer_type_node, NULL_TREE);
6497 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins),
6498 ftype);
6499
6500 ftype = build_function_type_list (ptr_type_node, NULL_TREE);
6501 alpha_builtin_function ("__builtin_thread_pointer", ftype,
6502 ALPHA_BUILTIN_THREAD_POINTER, ECF_NOTHROW);
6503
6504 ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
6505 alpha_builtin_function ("__builtin_set_thread_pointer", ftype,
6506 ALPHA_BUILTIN_SET_THREAD_POINTER, ECF_NOTHROW);
6507
6508 if (TARGET_ABI_OPEN_VMS)
6509 {
6510 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6511 NULL_TREE);
6512 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6513 ftype,
6514 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6515 0);
6516
6517 ftype = build_function_type_list (ptr_type_node, void_type_node,
6518 NULL_TREE);
6519 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6520 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6521
6522 vms_patch_builtins ();
6523 }
6524
6525 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6526 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6527 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6528 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6529 }
6530
6531 /* Expand an expression EXP that calls a built-in function,
6532 with result going to TARGET if that's convenient
6533 (and in mode MODE if that's convenient).
6534 SUBTARGET may be used as the target for computing one of EXP's operands.
6535 IGNORE is nonzero if the value is to be ignored. */
6536
6537 static rtx
6538 alpha_expand_builtin (tree exp, rtx target,
6539 rtx subtarget ATTRIBUTE_UNUSED,
6540 enum machine_mode mode ATTRIBUTE_UNUSED,
6541 int ignore ATTRIBUTE_UNUSED)
6542 {
6543 #define MAX_ARGS 2
6544
6545 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6546 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6547 tree arg;
6548 call_expr_arg_iterator iter;
6549 enum insn_code icode;
6550 rtx op[MAX_ARGS], pat;
6551 int arity;
6552 bool nonvoid;
6553
6554 if (fcode >= ALPHA_BUILTIN_max)
6555 internal_error ("bad builtin fcode");
6556 icode = code_for_builtin[fcode];
6557 if (icode == 0)
6558 internal_error ("bad builtin fcode");
6559
6560 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6561
6562 arity = 0;
6563 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6564 {
6565 const struct insn_operand_data *insn_op;
6566
6567 if (arg == error_mark_node)
6568 return NULL_RTX;
6569 if (arity > MAX_ARGS)
6570 return NULL_RTX;
6571
6572 insn_op = &insn_data[icode].operand[arity + nonvoid];
6573
6574 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6575
6576 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6577 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6578 arity++;
6579 }
6580
6581 if (nonvoid)
6582 {
6583 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6584 if (!target
6585 || GET_MODE (target) != tmode
6586 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6587 target = gen_reg_rtx (tmode);
6588 }
6589
6590 switch (arity)
6591 {
6592 case 0:
6593 pat = GEN_FCN (icode) (target);
6594 break;
6595 case 1:
6596 if (nonvoid)
6597 pat = GEN_FCN (icode) (target, op[0]);
6598 else
6599 pat = GEN_FCN (icode) (op[0]);
6600 break;
6601 case 2:
6602 pat = GEN_FCN (icode) (target, op[0], op[1]);
6603 break;
6604 default:
6605 gcc_unreachable ();
6606 }
6607 if (!pat)
6608 return NULL_RTX;
6609 emit_insn (pat);
6610
6611 if (nonvoid)
6612 return target;
6613 else
6614 return const0_rtx;
6615 }
6616
6617
6618 /* Several bits below assume HWI >= 64 bits. This should be enforced
6619 by config.gcc. */
6620 #if HOST_BITS_PER_WIDE_INT < 64
6621 # error "HOST_WIDE_INT too small"
6622 #endif
6623
6624 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6625 with an 8-bit output vector. OPINT contains the integer operands; bit N
6626 of OP_CONST is set if OPINT[N] is valid. */
6627
6628 static tree
6629 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6630 {
6631 if (op_const == 3)
6632 {
6633 int i, val;
6634 for (i = 0, val = 0; i < 8; ++i)
6635 {
6636 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6637 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6638 if (c0 >= c1)
6639 val |= 1 << i;
6640 }
6641 return build_int_cst (long_integer_type_node, val);
6642 }
6643 else if (op_const == 2 && opint[1] == 0)
6644 return build_int_cst (long_integer_type_node, 0xff);
6645 return NULL;
6646 }
6647
6648 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6649 specialized form of an AND operation. Other byte manipulation instructions
6650 are defined in terms of this instruction, so this is also used as a
6651 subroutine for other builtins.
6652
6653 OP contains the tree operands; OPINT contains the extracted integer values.
6654 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6655 OPINT may be considered. */
6656
6657 static tree
6658 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6659 long op_const)
6660 {
6661 if (op_const & 2)
6662 {
6663 unsigned HOST_WIDE_INT mask = 0;
6664 int i;
6665
6666 for (i = 0; i < 8; ++i)
6667 if ((opint[1] >> i) & 1)
6668 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6669
6670 if (op_const & 1)
6671 return build_int_cst (long_integer_type_node, opint[0] & mask);
6672
6673 if (op)
6674 return fold_build2 (BIT_AND_EXPR, long_integer_type_node, op[0],
6675 build_int_cst (long_integer_type_node, mask));
6676 }
6677 else if ((op_const & 1) && opint[0] == 0)
6678 return build_int_cst (long_integer_type_node, 0);
6679 return NULL;
6680 }
6681
6682 /* Fold the builtins for the EXT family of instructions. */
6683
6684 static tree
6685 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6686 long op_const, unsigned HOST_WIDE_INT bytemask,
6687 bool is_high)
6688 {
6689 long zap_const = 2;
6690 tree *zap_op = NULL;
6691
6692 if (op_const & 2)
6693 {
6694 unsigned HOST_WIDE_INT loc;
6695
6696 loc = opint[1] & 7;
6697 loc *= BITS_PER_UNIT;
6698
6699 if (loc != 0)
6700 {
6701 if (op_const & 1)
6702 {
6703 unsigned HOST_WIDE_INT temp = opint[0];
6704 if (is_high)
6705 temp <<= loc;
6706 else
6707 temp >>= loc;
6708 opint[0] = temp;
6709 zap_const = 3;
6710 }
6711 }
6712 else
6713 zap_op = op;
6714 }
6715
6716 opint[1] = bytemask;
6717 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6718 }
6719
6720 /* Fold the builtins for the INS family of instructions. */
6721
6722 static tree
6723 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6724 long op_const, unsigned HOST_WIDE_INT bytemask,
6725 bool is_high)
6726 {
6727 if ((op_const & 1) && opint[0] == 0)
6728 return build_int_cst (long_integer_type_node, 0);
6729
6730 if (op_const & 2)
6731 {
6732 unsigned HOST_WIDE_INT temp, loc, byteloc;
6733 tree *zap_op = NULL;
6734
6735 loc = opint[1] & 7;
6736 bytemask <<= loc;
6737
6738 temp = opint[0];
6739 if (is_high)
6740 {
6741 byteloc = (64 - (loc * 8)) & 0x3f;
6742 if (byteloc == 0)
6743 zap_op = op;
6744 else
6745 temp >>= byteloc;
6746 bytemask >>= 8;
6747 }
6748 else
6749 {
6750 byteloc = loc * 8;
6751 if (byteloc == 0)
6752 zap_op = op;
6753 else
6754 temp <<= byteloc;
6755 }
6756
6757 opint[0] = temp;
6758 opint[1] = bytemask;
6759 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6760 }
6761
6762 return NULL;
6763 }
6764
6765 static tree
6766 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6767 long op_const, unsigned HOST_WIDE_INT bytemask,
6768 bool is_high)
6769 {
6770 if (op_const & 2)
6771 {
6772 unsigned HOST_WIDE_INT loc;
6773
6774 loc = opint[1] & 7;
6775 bytemask <<= loc;
6776
6777 if (is_high)
6778 bytemask >>= 8;
6779
6780 opint[1] = bytemask ^ 0xff;
6781 }
6782
6783 return alpha_fold_builtin_zapnot (op, opint, op_const);
6784 }
6785
6786 static tree
6787 alpha_fold_builtin_umulh (unsigned HOST_WIDE_INT opint[], long op_const)
6788 {
6789 switch (op_const)
6790 {
6791 case 3:
6792 {
6793 unsigned HOST_WIDE_INT l;
6794 HOST_WIDE_INT h;
6795
6796 mul_double (opint[0], 0, opint[1], 0, &l, &h);
6797
6798 #if HOST_BITS_PER_WIDE_INT > 64
6799 # error fixme
6800 #endif
6801
6802 return build_int_cst (long_integer_type_node, h);
6803 }
6804
6805 case 1:
6806 opint[1] = opint[0];
6807 /* FALLTHRU */
6808 case 2:
6809 /* Note that (X*1) >> 64 == 0. */
6810 if (opint[1] == 0 || opint[1] == 1)
6811 return build_int_cst (long_integer_type_node, 0);
6812 break;
6813 }
6814 return NULL;
6815 }
6816
6817 static tree
6818 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6819 {
6820 tree op0 = fold_convert (vtype, op[0]);
6821 tree op1 = fold_convert (vtype, op[1]);
6822 tree val = fold_build2 (code, vtype, op0, op1);
6823 return fold_build1 (VIEW_CONVERT_EXPR, long_integer_type_node, val);
6824 }
6825
6826 static tree
6827 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6828 {
6829 unsigned HOST_WIDE_INT temp = 0;
6830 int i;
6831
6832 if (op_const != 3)
6833 return NULL;
6834
6835 for (i = 0; i < 8; ++i)
6836 {
6837 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6838 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6839 if (a >= b)
6840 temp += a - b;
6841 else
6842 temp += b - a;
6843 }
6844
6845 return build_int_cst (long_integer_type_node, temp);
6846 }
6847
6848 static tree
6849 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6850 {
6851 unsigned HOST_WIDE_INT temp;
6852
6853 if (op_const == 0)
6854 return NULL;
6855
6856 temp = opint[0] & 0xff;
6857 temp |= (opint[0] >> 24) & 0xff00;
6858
6859 return build_int_cst (long_integer_type_node, temp);
6860 }
6861
6862 static tree
6863 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6864 {
6865 unsigned HOST_WIDE_INT temp;
6866
6867 if (op_const == 0)
6868 return NULL;
6869
6870 temp = opint[0] & 0xff;
6871 temp |= (opint[0] >> 8) & 0xff00;
6872 temp |= (opint[0] >> 16) & 0xff0000;
6873 temp |= (opint[0] >> 24) & 0xff000000;
6874
6875 return build_int_cst (long_integer_type_node, temp);
6876 }
6877
6878 static tree
6879 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6880 {
6881 unsigned HOST_WIDE_INT temp;
6882
6883 if (op_const == 0)
6884 return NULL;
6885
6886 temp = opint[0] & 0xff;
6887 temp |= (opint[0] & 0xff00) << 24;
6888
6889 return build_int_cst (long_integer_type_node, temp);
6890 }
6891
6892 static tree
6893 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6894 {
6895 unsigned HOST_WIDE_INT temp;
6896
6897 if (op_const == 0)
6898 return NULL;
6899
6900 temp = opint[0] & 0xff;
6901 temp |= (opint[0] & 0x0000ff00) << 8;
6902 temp |= (opint[0] & 0x00ff0000) << 16;
6903 temp |= (opint[0] & 0xff000000) << 24;
6904
6905 return build_int_cst (long_integer_type_node, temp);
6906 }
6907
6908 static tree
6909 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6910 {
6911 unsigned HOST_WIDE_INT temp;
6912
6913 if (op_const == 0)
6914 return NULL;
6915
6916 if (opint[0] == 0)
6917 temp = 64;
6918 else
6919 temp = exact_log2 (opint[0] & -opint[0]);
6920
6921 return build_int_cst (long_integer_type_node, temp);
6922 }
6923
6924 static tree
6925 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6926 {
6927 unsigned HOST_WIDE_INT temp;
6928
6929 if (op_const == 0)
6930 return NULL;
6931
6932 if (opint[0] == 0)
6933 temp = 64;
6934 else
6935 temp = 64 - floor_log2 (opint[0]) - 1;
6936
6937 return build_int_cst (long_integer_type_node, temp);
6938 }
6939
6940 static tree
6941 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
6942 {
6943 unsigned HOST_WIDE_INT temp, op;
6944
6945 if (op_const == 0)
6946 return NULL;
6947
6948 op = opint[0];
6949 temp = 0;
6950 while (op)
6951 temp++, op &= op - 1;
6952
6953 return build_int_cst (long_integer_type_node, temp);
6954 }
6955
6956 /* Fold one of our builtin functions. */
6957
6958 static tree
6959 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
6960 bool ignore ATTRIBUTE_UNUSED)
6961 {
6962 unsigned HOST_WIDE_INT opint[MAX_ARGS];
6963 long op_const = 0;
6964 int i;
6965
6966 if (n_args >= MAX_ARGS)
6967 return NULL;
6968
6969 for (i = 0; i < n_args; i++)
6970 {
6971 tree arg = op[i];
6972 if (arg == error_mark_node)
6973 return NULL;
6974
6975 opint[i] = 0;
6976 if (TREE_CODE (arg) == INTEGER_CST)
6977 {
6978 op_const |= 1L << i;
6979 opint[i] = int_cst_value (arg);
6980 }
6981 }
6982
6983 switch (DECL_FUNCTION_CODE (fndecl))
6984 {
6985 case ALPHA_BUILTIN_CMPBGE:
6986 return alpha_fold_builtin_cmpbge (opint, op_const);
6987
6988 case ALPHA_BUILTIN_EXTBL:
6989 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
6990 case ALPHA_BUILTIN_EXTWL:
6991 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
6992 case ALPHA_BUILTIN_EXTLL:
6993 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
6994 case ALPHA_BUILTIN_EXTQL:
6995 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
6996 case ALPHA_BUILTIN_EXTWH:
6997 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
6998 case ALPHA_BUILTIN_EXTLH:
6999 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7000 case ALPHA_BUILTIN_EXTQH:
7001 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7002
7003 case ALPHA_BUILTIN_INSBL:
7004 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7005 case ALPHA_BUILTIN_INSWL:
7006 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7007 case ALPHA_BUILTIN_INSLL:
7008 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7009 case ALPHA_BUILTIN_INSQL:
7010 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7011 case ALPHA_BUILTIN_INSWH:
7012 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7013 case ALPHA_BUILTIN_INSLH:
7014 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7015 case ALPHA_BUILTIN_INSQH:
7016 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7017
7018 case ALPHA_BUILTIN_MSKBL:
7019 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7020 case ALPHA_BUILTIN_MSKWL:
7021 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7022 case ALPHA_BUILTIN_MSKLL:
7023 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7024 case ALPHA_BUILTIN_MSKQL:
7025 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7026 case ALPHA_BUILTIN_MSKWH:
7027 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7028 case ALPHA_BUILTIN_MSKLH:
7029 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7030 case ALPHA_BUILTIN_MSKQH:
7031 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7032
7033 case ALPHA_BUILTIN_UMULH:
7034 return alpha_fold_builtin_umulh (opint, op_const);
7035
7036 case ALPHA_BUILTIN_ZAP:
7037 opint[1] ^= 0xff;
7038 /* FALLTHRU */
7039 case ALPHA_BUILTIN_ZAPNOT:
7040 return alpha_fold_builtin_zapnot (op, opint, op_const);
7041
7042 case ALPHA_BUILTIN_MINUB8:
7043 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7044 case ALPHA_BUILTIN_MINSB8:
7045 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7046 case ALPHA_BUILTIN_MINUW4:
7047 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7048 case ALPHA_BUILTIN_MINSW4:
7049 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7050 case ALPHA_BUILTIN_MAXUB8:
7051 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7052 case ALPHA_BUILTIN_MAXSB8:
7053 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7054 case ALPHA_BUILTIN_MAXUW4:
7055 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7056 case ALPHA_BUILTIN_MAXSW4:
7057 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7058
7059 case ALPHA_BUILTIN_PERR:
7060 return alpha_fold_builtin_perr (opint, op_const);
7061 case ALPHA_BUILTIN_PKLB:
7062 return alpha_fold_builtin_pklb (opint, op_const);
7063 case ALPHA_BUILTIN_PKWB:
7064 return alpha_fold_builtin_pkwb (opint, op_const);
7065 case ALPHA_BUILTIN_UNPKBL:
7066 return alpha_fold_builtin_unpkbl (opint, op_const);
7067 case ALPHA_BUILTIN_UNPKBW:
7068 return alpha_fold_builtin_unpkbw (opint, op_const);
7069
7070 case ALPHA_BUILTIN_CTTZ:
7071 return alpha_fold_builtin_cttz (opint, op_const);
7072 case ALPHA_BUILTIN_CTLZ:
7073 return alpha_fold_builtin_ctlz (opint, op_const);
7074 case ALPHA_BUILTIN_CTPOP:
7075 return alpha_fold_builtin_ctpop (opint, op_const);
7076
7077 case ALPHA_BUILTIN_AMASK:
7078 case ALPHA_BUILTIN_IMPLVER:
7079 case ALPHA_BUILTIN_RPCC:
7080 case ALPHA_BUILTIN_THREAD_POINTER:
7081 case ALPHA_BUILTIN_SET_THREAD_POINTER:
7082 /* None of these are foldable at compile-time. */
7083 default:
7084 return NULL;
7085 }
7086 }
7087 \f
7088 /* This page contains routines that are used to determine what the function
7089 prologue and epilogue code will do and write them out. */
7090
7091 /* Compute the size of the save area in the stack. */
7092
7093 /* These variables are used for communication between the following functions.
7094 They indicate various things about the current function being compiled
7095 that are used to tell what kind of prologue, epilogue and procedure
7096 descriptor to generate. */
7097
7098 /* Nonzero if we need a stack procedure. */
7099 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7100 static enum alpha_procedure_types alpha_procedure_type;
7101
7102 /* Register number (either FP or SP) that is used to unwind the frame. */
7103 static int vms_unwind_regno;
7104
7105 /* Register number used to save FP. We need not have one for RA since
7106 we don't modify it for register procedures. This is only defined
7107 for register frame procedures. */
7108 static int vms_save_fp_regno;
7109
7110 /* Register number used to reference objects off our PV. */
7111 static int vms_base_regno;
7112
7113 /* Compute register masks for saved registers. */
7114
7115 static void
7116 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7117 {
7118 unsigned long imask = 0;
7119 unsigned long fmask = 0;
7120 unsigned int i;
7121
7122 /* When outputting a thunk, we don't have valid register life info,
7123 but assemble_start_function wants to output .frame and .mask
7124 directives. */
7125 if (cfun->is_thunk)
7126 {
7127 *imaskP = 0;
7128 *fmaskP = 0;
7129 return;
7130 }
7131
7132 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7133 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7134
7135 /* One for every register we have to save. */
7136 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7137 if (! fixed_regs[i] && ! call_used_regs[i]
7138 && df_regs_ever_live_p (i) && i != REG_RA)
7139 {
7140 if (i < 32)
7141 imask |= (1UL << i);
7142 else
7143 fmask |= (1UL << (i - 32));
7144 }
7145
7146 /* We need to restore these for the handler. */
7147 if (crtl->calls_eh_return)
7148 {
7149 for (i = 0; ; ++i)
7150 {
7151 unsigned regno = EH_RETURN_DATA_REGNO (i);
7152 if (regno == INVALID_REGNUM)
7153 break;
7154 imask |= 1UL << regno;
7155 }
7156 }
7157
7158 /* If any register spilled, then spill the return address also. */
7159 /* ??? This is required by the Digital stack unwind specification
7160 and isn't needed if we're doing Dwarf2 unwinding. */
7161 if (imask || fmask || alpha_ra_ever_killed ())
7162 imask |= (1UL << REG_RA);
7163
7164 *imaskP = imask;
7165 *fmaskP = fmask;
7166 }
7167
7168 int
7169 alpha_sa_size (void)
7170 {
7171 unsigned long mask[2];
7172 int sa_size = 0;
7173 int i, j;
7174
7175 alpha_sa_mask (&mask[0], &mask[1]);
7176
7177 for (j = 0; j < 2; ++j)
7178 for (i = 0; i < 32; ++i)
7179 if ((mask[j] >> i) & 1)
7180 sa_size++;
7181
7182 if (TARGET_ABI_OPEN_VMS)
7183 {
7184 /* Start with a stack procedure if we make any calls (REG_RA used), or
7185 need a frame pointer, with a register procedure if we otherwise need
7186 at least a slot, and with a null procedure in other cases. */
7187 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7188 alpha_procedure_type = PT_STACK;
7189 else if (get_frame_size() != 0)
7190 alpha_procedure_type = PT_REGISTER;
7191 else
7192 alpha_procedure_type = PT_NULL;
7193
7194 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7195 made the final decision on stack procedure vs register procedure. */
7196 if (alpha_procedure_type == PT_STACK)
7197 sa_size -= 2;
7198
7199 /* Decide whether to refer to objects off our PV via FP or PV.
7200 If we need FP for something else or if we receive a nonlocal
7201 goto (which expects PV to contain the value), we must use PV.
7202 Otherwise, start by assuming we can use FP. */
7203
7204 vms_base_regno
7205 = (frame_pointer_needed
7206 || cfun->has_nonlocal_label
7207 || alpha_procedure_type == PT_STACK
7208 || crtl->outgoing_args_size)
7209 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7210
7211 /* If we want to copy PV into FP, we need to find some register
7212 in which to save FP. */
7213
7214 vms_save_fp_regno = -1;
7215 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7216 for (i = 0; i < 32; i++)
7217 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7218 vms_save_fp_regno = i;
7219
7220 /* A VMS condition handler requires a stack procedure in our
7221 implementation. (not required by the calling standard). */
7222 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7223 || cfun->machine->uses_condition_handler)
7224 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7225 else if (alpha_procedure_type == PT_NULL)
7226 vms_base_regno = REG_PV;
7227
7228 /* Stack unwinding should be done via FP unless we use it for PV. */
7229 vms_unwind_regno = (vms_base_regno == REG_PV
7230 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7231
7232 /* If this is a stack procedure, allow space for saving FP, RA and
7233 a condition handler slot if needed. */
7234 if (alpha_procedure_type == PT_STACK)
7235 sa_size += 2 + cfun->machine->uses_condition_handler;
7236 }
7237 else
7238 {
7239 /* Our size must be even (multiple of 16 bytes). */
7240 if (sa_size & 1)
7241 sa_size++;
7242 }
7243
7244 return sa_size * 8;
7245 }
7246
7247 /* Define the offset between two registers, one to be eliminated,
7248 and the other its replacement, at the start of a routine. */
7249
7250 HOST_WIDE_INT
7251 alpha_initial_elimination_offset (unsigned int from,
7252 unsigned int to ATTRIBUTE_UNUSED)
7253 {
7254 HOST_WIDE_INT ret;
7255
7256 ret = alpha_sa_size ();
7257 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7258
7259 switch (from)
7260 {
7261 case FRAME_POINTER_REGNUM:
7262 break;
7263
7264 case ARG_POINTER_REGNUM:
7265 ret += (ALPHA_ROUND (get_frame_size ()
7266 + crtl->args.pretend_args_size)
7267 - crtl->args.pretend_args_size);
7268 break;
7269
7270 default:
7271 gcc_unreachable ();
7272 }
7273
7274 return ret;
7275 }
7276
7277 #if TARGET_ABI_OPEN_VMS
7278
7279 /* Worker function for TARGET_CAN_ELIMINATE. */
7280
7281 static bool
7282 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7283 {
7284 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7285 alpha_sa_size ();
7286
7287 switch (alpha_procedure_type)
7288 {
7289 case PT_NULL:
7290 /* NULL procedures have no frame of their own and we only
7291 know how to resolve from the current stack pointer. */
7292 return to == STACK_POINTER_REGNUM;
7293
7294 case PT_REGISTER:
7295 case PT_STACK:
7296 /* We always eliminate except to the stack pointer if there is no
7297 usable frame pointer at hand. */
7298 return (to != STACK_POINTER_REGNUM
7299 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7300 }
7301
7302 gcc_unreachable ();
7303 }
7304
7305 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7306 designates the same location as FROM. */
7307
7308 HOST_WIDE_INT
7309 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7310 {
7311 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7312 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7313 on the proper computations and will need the register save area size
7314 in most cases. */
7315
7316 HOST_WIDE_INT sa_size = alpha_sa_size ();
7317
7318 /* PT_NULL procedures have no frame of their own and we only allow
7319 elimination to the stack pointer. This is the argument pointer and we
7320 resolve the soft frame pointer to that as well. */
7321
7322 if (alpha_procedure_type == PT_NULL)
7323 return 0;
7324
7325 /* For a PT_STACK procedure the frame layout looks as follows
7326
7327 -----> decreasing addresses
7328
7329 < size rounded up to 16 | likewise >
7330 --------------#------------------------------+++--------------+++-------#
7331 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7332 --------------#---------------------------------------------------------#
7333 ^ ^ ^ ^
7334 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7335
7336
7337 PT_REGISTER procedures are similar in that they may have a frame of their
7338 own. They have no regs-sa/pv/outgoing-args area.
7339
7340 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7341 to STACK_PTR if need be. */
7342
7343 {
7344 HOST_WIDE_INT offset;
7345 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7346
7347 switch (from)
7348 {
7349 case FRAME_POINTER_REGNUM:
7350 offset = ALPHA_ROUND (sa_size + pv_save_size);
7351 break;
7352 case ARG_POINTER_REGNUM:
7353 offset = (ALPHA_ROUND (sa_size + pv_save_size
7354 + get_frame_size ()
7355 + crtl->args.pretend_args_size)
7356 - crtl->args.pretend_args_size);
7357 break;
7358 default:
7359 gcc_unreachable ();
7360 }
7361
7362 if (to == STACK_POINTER_REGNUM)
7363 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7364
7365 return offset;
7366 }
7367 }
7368
7369 #define COMMON_OBJECT "common_object"
7370
7371 static tree
7372 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7373 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7374 bool *no_add_attrs ATTRIBUTE_UNUSED)
7375 {
7376 tree decl = *node;
7377 gcc_assert (DECL_P (decl));
7378
7379 DECL_COMMON (decl) = 1;
7380 return NULL_TREE;
7381 }
7382
7383 static const struct attribute_spec vms_attribute_table[] =
7384 {
7385 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7386 affects_type_identity } */
7387 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7388 { NULL, 0, 0, false, false, false, NULL, false }
7389 };
7390
7391 void
7392 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7393 unsigned HOST_WIDE_INT size,
7394 unsigned int align)
7395 {
7396 tree attr = DECL_ATTRIBUTES (decl);
7397 fprintf (file, "%s", COMMON_ASM_OP);
7398 assemble_name (file, name);
7399 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7400 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7401 fprintf (file, ",%u", align / BITS_PER_UNIT);
7402 if (attr)
7403 {
7404 attr = lookup_attribute (COMMON_OBJECT, attr);
7405 if (attr)
7406 fprintf (file, ",%s",
7407 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7408 }
7409 fputc ('\n', file);
7410 }
7411
7412 #undef COMMON_OBJECT
7413
7414 #endif
7415
7416 static int
7417 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7418 {
7419 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7420 }
7421
7422 int
7423 alpha_find_lo_sum_using_gp (rtx insn)
7424 {
7425 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7426 }
7427
7428 static int
7429 alpha_does_function_need_gp (void)
7430 {
7431 rtx insn;
7432
7433 /* The GP being variable is an OSF abi thing. */
7434 if (! TARGET_ABI_OSF)
7435 return 0;
7436
7437 /* We need the gp to load the address of __mcount. */
7438 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7439 return 1;
7440
7441 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7442 if (cfun->is_thunk)
7443 return 1;
7444
7445 /* The nonlocal receiver pattern assumes that the gp is valid for
7446 the nested function. Reasonable because it's almost always set
7447 correctly already. For the cases where that's wrong, make sure
7448 the nested function loads its gp on entry. */
7449 if (crtl->has_nonlocal_goto)
7450 return 1;
7451
7452 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7453 Even if we are a static function, we still need to do this in case
7454 our address is taken and passed to something like qsort. */
7455
7456 push_topmost_sequence ();
7457 insn = get_insns ();
7458 pop_topmost_sequence ();
7459
7460 for (; insn; insn = NEXT_INSN (insn))
7461 if (NONDEBUG_INSN_P (insn)
7462 && ! JUMP_TABLE_DATA_P (insn)
7463 && GET_CODE (PATTERN (insn)) != USE
7464 && GET_CODE (PATTERN (insn)) != CLOBBER
7465 && get_attr_usegp (insn))
7466 return 1;
7467
7468 return 0;
7469 }
7470
7471 \f
7472 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7473 sequences. */
7474
7475 static rtx
7476 set_frame_related_p (void)
7477 {
7478 rtx seq = get_insns ();
7479 rtx insn;
7480
7481 end_sequence ();
7482
7483 if (!seq)
7484 return NULL_RTX;
7485
7486 if (INSN_P (seq))
7487 {
7488 insn = seq;
7489 while (insn != NULL_RTX)
7490 {
7491 RTX_FRAME_RELATED_P (insn) = 1;
7492 insn = NEXT_INSN (insn);
7493 }
7494 seq = emit_insn (seq);
7495 }
7496 else
7497 {
7498 seq = emit_insn (seq);
7499 RTX_FRAME_RELATED_P (seq) = 1;
7500 }
7501 return seq;
7502 }
7503
7504 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7505
7506 /* Generates a store with the proper unwind info attached. VALUE is
7507 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7508 contains SP+FRAME_BIAS, and that is the unwind info that should be
7509 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7510 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7511
7512 static void
7513 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7514 HOST_WIDE_INT base_ofs, rtx frame_reg)
7515 {
7516 rtx addr, mem, insn;
7517
7518 addr = plus_constant (base_reg, base_ofs);
7519 mem = gen_frame_mem (DImode, addr);
7520
7521 insn = emit_move_insn (mem, value);
7522 RTX_FRAME_RELATED_P (insn) = 1;
7523
7524 if (frame_bias || value != frame_reg)
7525 {
7526 if (frame_bias)
7527 {
7528 addr = plus_constant (stack_pointer_rtx, frame_bias + base_ofs);
7529 mem = gen_rtx_MEM (DImode, addr);
7530 }
7531
7532 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7533 gen_rtx_SET (VOIDmode, mem, frame_reg));
7534 }
7535 }
7536
7537 static void
7538 emit_frame_store (unsigned int regno, rtx base_reg,
7539 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7540 {
7541 rtx reg = gen_rtx_REG (DImode, regno);
7542 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7543 }
7544
7545 /* Compute the frame size. SIZE is the size of the "naked" frame
7546 and SA_SIZE is the size of the register save area. */
7547
7548 static HOST_WIDE_INT
7549 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7550 {
7551 if (TARGET_ABI_OPEN_VMS)
7552 return ALPHA_ROUND (sa_size
7553 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7554 + size
7555 + crtl->args.pretend_args_size);
7556 else
7557 return ALPHA_ROUND (crtl->outgoing_args_size)
7558 + sa_size
7559 + ALPHA_ROUND (size
7560 + crtl->args.pretend_args_size);
7561 }
7562
7563 /* Write function prologue. */
7564
7565 /* On vms we have two kinds of functions:
7566
7567 - stack frame (PROC_STACK)
7568 these are 'normal' functions with local vars and which are
7569 calling other functions
7570 - register frame (PROC_REGISTER)
7571 keeps all data in registers, needs no stack
7572
7573 We must pass this to the assembler so it can generate the
7574 proper pdsc (procedure descriptor)
7575 This is done with the '.pdesc' command.
7576
7577 On not-vms, we don't really differentiate between the two, as we can
7578 simply allocate stack without saving registers. */
7579
7580 void
7581 alpha_expand_prologue (void)
7582 {
7583 /* Registers to save. */
7584 unsigned long imask = 0;
7585 unsigned long fmask = 0;
7586 /* Stack space needed for pushing registers clobbered by us. */
7587 HOST_WIDE_INT sa_size, sa_bias;
7588 /* Complete stack size needed. */
7589 HOST_WIDE_INT frame_size;
7590 /* Probed stack size; it additionally includes the size of
7591 the "reserve region" if any. */
7592 HOST_WIDE_INT probed_size;
7593 /* Offset from base reg to register save area. */
7594 HOST_WIDE_INT reg_offset;
7595 rtx sa_reg;
7596 int i;
7597
7598 sa_size = alpha_sa_size ();
7599 frame_size = compute_frame_size (get_frame_size (), sa_size);
7600
7601 if (flag_stack_usage_info)
7602 current_function_static_stack_size = frame_size;
7603
7604 if (TARGET_ABI_OPEN_VMS)
7605 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7606 else
7607 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7608
7609 alpha_sa_mask (&imask, &fmask);
7610
7611 /* Emit an insn to reload GP, if needed. */
7612 if (TARGET_ABI_OSF)
7613 {
7614 alpha_function_needs_gp = alpha_does_function_need_gp ();
7615 if (alpha_function_needs_gp)
7616 emit_insn (gen_prologue_ldgp ());
7617 }
7618
7619 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7620 the call to mcount ourselves, rather than having the linker do it
7621 magically in response to -pg. Since _mcount has special linkage,
7622 don't represent the call as a call. */
7623 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7624 emit_insn (gen_prologue_mcount ());
7625
7626 /* Adjust the stack by the frame size. If the frame size is > 4096
7627 bytes, we need to be sure we probe somewhere in the first and last
7628 4096 bytes (we can probably get away without the latter test) and
7629 every 8192 bytes in between. If the frame size is > 32768, we
7630 do this in a loop. Otherwise, we generate the explicit probe
7631 instructions.
7632
7633 Note that we are only allowed to adjust sp once in the prologue. */
7634
7635 probed_size = frame_size;
7636 if (flag_stack_check)
7637 probed_size += STACK_CHECK_PROTECT;
7638
7639 if (probed_size <= 32768)
7640 {
7641 if (probed_size > 4096)
7642 {
7643 int probed;
7644
7645 for (probed = 4096; probed < probed_size; probed += 8192)
7646 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7647
7648 /* We only have to do this probe if we aren't saving registers or
7649 if we are probing beyond the frame because of -fstack-check. */
7650 if ((sa_size == 0 && probed_size > probed - 4096)
7651 || flag_stack_check)
7652 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7653 }
7654
7655 if (frame_size != 0)
7656 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7657 GEN_INT (-frame_size))));
7658 }
7659 else
7660 {
7661 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7662 number of 8192 byte blocks to probe. We then probe each block
7663 in the loop and then set SP to the proper location. If the
7664 amount remaining is > 4096, we have to do one more probe if we
7665 are not saving any registers or if we are probing beyond the
7666 frame because of -fstack-check. */
7667
7668 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7669 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7670 rtx ptr = gen_rtx_REG (DImode, 22);
7671 rtx count = gen_rtx_REG (DImode, 23);
7672 rtx seq;
7673
7674 emit_move_insn (count, GEN_INT (blocks));
7675 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7676
7677 /* Because of the difficulty in emitting a new basic block this
7678 late in the compilation, generate the loop as a single insn. */
7679 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7680
7681 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7682 {
7683 rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
7684 MEM_VOLATILE_P (last) = 1;
7685 emit_move_insn (last, const0_rtx);
7686 }
7687
7688 if (flag_stack_check)
7689 {
7690 /* If -fstack-check is specified we have to load the entire
7691 constant into a register and subtract from the sp in one go,
7692 because the probed stack size is not equal to the frame size. */
7693 HOST_WIDE_INT lo, hi;
7694 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7695 hi = frame_size - lo;
7696
7697 emit_move_insn (ptr, GEN_INT (hi));
7698 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7699 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7700 ptr));
7701 }
7702 else
7703 {
7704 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7705 GEN_INT (-leftover)));
7706 }
7707
7708 /* This alternative is special, because the DWARF code cannot
7709 possibly intuit through the loop above. So we invent this
7710 note it looks at instead. */
7711 RTX_FRAME_RELATED_P (seq) = 1;
7712 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7713 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7714 plus_constant (stack_pointer_rtx,
7715 -frame_size)));
7716 }
7717
7718 /* Cope with very large offsets to the register save area. */
7719 sa_bias = 0;
7720 sa_reg = stack_pointer_rtx;
7721 if (reg_offset + sa_size > 0x8000)
7722 {
7723 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7724 rtx sa_bias_rtx;
7725
7726 if (low + sa_size <= 0x8000)
7727 sa_bias = reg_offset - low, reg_offset = low;
7728 else
7729 sa_bias = reg_offset, reg_offset = 0;
7730
7731 sa_reg = gen_rtx_REG (DImode, 24);
7732 sa_bias_rtx = GEN_INT (sa_bias);
7733
7734 if (add_operand (sa_bias_rtx, DImode))
7735 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7736 else
7737 {
7738 emit_move_insn (sa_reg, sa_bias_rtx);
7739 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7740 }
7741 }
7742
7743 /* Save regs in stack order. Beginning with VMS PV. */
7744 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7745 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7746
7747 /* Save register RA next. */
7748 if (imask & (1UL << REG_RA))
7749 {
7750 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7751 imask &= ~(1UL << REG_RA);
7752 reg_offset += 8;
7753 }
7754
7755 /* Now save any other registers required to be saved. */
7756 for (i = 0; i < 31; i++)
7757 if (imask & (1UL << i))
7758 {
7759 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7760 reg_offset += 8;
7761 }
7762
7763 for (i = 0; i < 31; i++)
7764 if (fmask & (1UL << i))
7765 {
7766 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7767 reg_offset += 8;
7768 }
7769
7770 if (TARGET_ABI_OPEN_VMS)
7771 {
7772 /* Register frame procedures save the fp. */
7773 if (alpha_procedure_type == PT_REGISTER)
7774 {
7775 rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7776 hard_frame_pointer_rtx);
7777 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7778 RTX_FRAME_RELATED_P (insn) = 1;
7779 }
7780
7781 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7782 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7783 gen_rtx_REG (DImode, REG_PV)));
7784
7785 if (alpha_procedure_type != PT_NULL
7786 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7787 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7788
7789 /* If we have to allocate space for outgoing args, do it now. */
7790 if (crtl->outgoing_args_size != 0)
7791 {
7792 rtx seq
7793 = emit_move_insn (stack_pointer_rtx,
7794 plus_constant
7795 (hard_frame_pointer_rtx,
7796 - (ALPHA_ROUND
7797 (crtl->outgoing_args_size))));
7798
7799 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7800 if ! frame_pointer_needed. Setting the bit will change the CFA
7801 computation rule to use sp again, which would be wrong if we had
7802 frame_pointer_needed, as this means sp might move unpredictably
7803 later on.
7804
7805 Also, note that
7806 frame_pointer_needed
7807 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7808 and
7809 crtl->outgoing_args_size != 0
7810 => alpha_procedure_type != PT_NULL,
7811
7812 so when we are not setting the bit here, we are guaranteed to
7813 have emitted an FRP frame pointer update just before. */
7814 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7815 }
7816 }
7817 else
7818 {
7819 /* If we need a frame pointer, set it from the stack pointer. */
7820 if (frame_pointer_needed)
7821 {
7822 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7823 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7824 else
7825 /* This must always be the last instruction in the
7826 prologue, thus we emit a special move + clobber. */
7827 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7828 stack_pointer_rtx, sa_reg)));
7829 }
7830 }
7831
7832 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7833 the prologue, for exception handling reasons, we cannot do this for
7834 any insn that might fault. We could prevent this for mems with a
7835 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7836 have to prevent all such scheduling with a blockage.
7837
7838 Linux, on the other hand, never bothered to implement OSF/1's
7839 exception handling, and so doesn't care about such things. Anyone
7840 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7841
7842 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7843 emit_insn (gen_blockage ());
7844 }
7845
7846 /* Count the number of .file directives, so that .loc is up to date. */
7847 int num_source_filenames = 0;
7848
7849 /* Output the textual info surrounding the prologue. */
7850
7851 void
7852 alpha_start_function (FILE *file, const char *fnname,
7853 tree decl ATTRIBUTE_UNUSED)
7854 {
7855 unsigned long imask = 0;
7856 unsigned long fmask = 0;
7857 /* Stack space needed for pushing registers clobbered by us. */
7858 HOST_WIDE_INT sa_size;
7859 /* Complete stack size needed. */
7860 unsigned HOST_WIDE_INT frame_size;
7861 /* The maximum debuggable frame size (512 Kbytes using Tru64 as). */
7862 unsigned HOST_WIDE_INT max_frame_size = TARGET_ABI_OSF && !TARGET_GAS
7863 ? 524288
7864 : 1UL << 31;
7865 /* Offset from base reg to register save area. */
7866 HOST_WIDE_INT reg_offset;
7867 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7868 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7869 int i;
7870
7871 #if TARGET_ABI_OPEN_VMS
7872 if (vms_debug_main
7873 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
7874 {
7875 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
7876 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
7877 switch_to_section (text_section);
7878 vms_debug_main = NULL;
7879 }
7880 #endif
7881
7882 alpha_fnname = fnname;
7883 sa_size = alpha_sa_size ();
7884 frame_size = compute_frame_size (get_frame_size (), sa_size);
7885
7886 if (TARGET_ABI_OPEN_VMS)
7887 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7888 else
7889 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7890
7891 alpha_sa_mask (&imask, &fmask);
7892
7893 /* Ecoff can handle multiple .file directives, so put out file and lineno.
7894 We have to do that before the .ent directive as we cannot switch
7895 files within procedures with native ecoff because line numbers are
7896 linked to procedure descriptors.
7897 Outputting the lineno helps debugging of one line functions as they
7898 would otherwise get no line number at all. Please note that we would
7899 like to put out last_linenum from final.c, but it is not accessible. */
7900
7901 if (write_symbols == SDB_DEBUG)
7902 {
7903 #ifdef ASM_OUTPUT_SOURCE_FILENAME
7904 ASM_OUTPUT_SOURCE_FILENAME (file,
7905 DECL_SOURCE_FILE (current_function_decl));
7906 #endif
7907 #ifdef SDB_OUTPUT_SOURCE_LINE
7908 if (debug_info_level != DINFO_LEVEL_TERSE)
7909 SDB_OUTPUT_SOURCE_LINE (file,
7910 DECL_SOURCE_LINE (current_function_decl));
7911 #endif
7912 }
7913
7914 /* Issue function start and label. */
7915 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
7916 {
7917 fputs ("\t.ent ", file);
7918 assemble_name (file, fnname);
7919 putc ('\n', file);
7920
7921 /* If the function needs GP, we'll write the "..ng" label there.
7922 Otherwise, do it here. */
7923 if (TARGET_ABI_OSF
7924 && ! alpha_function_needs_gp
7925 && ! cfun->is_thunk)
7926 {
7927 putc ('$', file);
7928 assemble_name (file, fnname);
7929 fputs ("..ng:\n", file);
7930 }
7931 }
7932 /* Nested functions on VMS that are potentially called via trampoline
7933 get a special transfer entry point that loads the called functions
7934 procedure descriptor and static chain. */
7935 if (TARGET_ABI_OPEN_VMS
7936 && !TREE_PUBLIC (decl)
7937 && DECL_CONTEXT (decl)
7938 && !TYPE_P (DECL_CONTEXT (decl))
7939 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
7940 {
7941 strcpy (tramp_label, fnname);
7942 strcat (tramp_label, "..tr");
7943 ASM_OUTPUT_LABEL (file, tramp_label);
7944 fprintf (file, "\tldq $1,24($27)\n");
7945 fprintf (file, "\tldq $27,16($27)\n");
7946 }
7947
7948 strcpy (entry_label, fnname);
7949 if (TARGET_ABI_OPEN_VMS)
7950 strcat (entry_label, "..en");
7951
7952 ASM_OUTPUT_LABEL (file, entry_label);
7953 inside_function = TRUE;
7954
7955 if (TARGET_ABI_OPEN_VMS)
7956 fprintf (file, "\t.base $%d\n", vms_base_regno);
7957
7958 if (TARGET_ABI_OSF
7959 && TARGET_IEEE_CONFORMANT
7960 && !flag_inhibit_size_directive)
7961 {
7962 /* Set flags in procedure descriptor to request IEEE-conformant
7963 math-library routines. The value we set it to is PDSC_EXC_IEEE
7964 (/usr/include/pdsc.h). */
7965 fputs ("\t.eflag 48\n", file);
7966 }
7967
7968 /* Set up offsets to alpha virtual arg/local debugging pointer. */
7969 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
7970 alpha_arg_offset = -frame_size + 48;
7971
7972 /* Describe our frame. If the frame size is larger than an integer,
7973 print it as zero to avoid an assembler error. We won't be
7974 properly describing such a frame, but that's the best we can do. */
7975 if (TARGET_ABI_OPEN_VMS)
7976 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
7977 HOST_WIDE_INT_PRINT_DEC "\n",
7978 vms_unwind_regno,
7979 frame_size >= (1UL << 31) ? 0 : frame_size,
7980 reg_offset);
7981 else if (!flag_inhibit_size_directive)
7982 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
7983 (frame_pointer_needed
7984 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
7985 frame_size >= max_frame_size ? 0 : frame_size,
7986 crtl->args.pretend_args_size);
7987
7988 /* Describe which registers were spilled. */
7989 if (TARGET_ABI_OPEN_VMS)
7990 {
7991 if (imask)
7992 /* ??? Does VMS care if mask contains ra? The old code didn't
7993 set it, so I don't here. */
7994 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
7995 if (fmask)
7996 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
7997 if (alpha_procedure_type == PT_REGISTER)
7998 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
7999 }
8000 else if (!flag_inhibit_size_directive)
8001 {
8002 if (imask)
8003 {
8004 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8005 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8006
8007 for (i = 0; i < 32; ++i)
8008 if (imask & (1UL << i))
8009 reg_offset += 8;
8010 }
8011
8012 if (fmask)
8013 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8014 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8015 }
8016
8017 #if TARGET_ABI_OPEN_VMS
8018 /* If a user condition handler has been installed at some point, emit
8019 the procedure descriptor bits to point the Condition Handling Facility
8020 at the indirection wrapper, and state the fp offset at which the user
8021 handler may be found. */
8022 if (cfun->machine->uses_condition_handler)
8023 {
8024 fprintf (file, "\t.handler __gcc_shell_handler\n");
8025 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8026 }
8027
8028 #ifdef TARGET_VMS_CRASH_DEBUG
8029 /* Support of minimal traceback info. */
8030 switch_to_section (readonly_data_section);
8031 fprintf (file, "\t.align 3\n");
8032 assemble_name (file, fnname); fputs ("..na:\n", file);
8033 fputs ("\t.ascii \"", file);
8034 assemble_name (file, fnname);
8035 fputs ("\\0\"\n", file);
8036 switch_to_section (text_section);
8037 #endif
8038 #endif /* TARGET_ABI_OPEN_VMS */
8039 }
8040
8041 /* Emit the .prologue note at the scheduled end of the prologue. */
8042
8043 static void
8044 alpha_output_function_end_prologue (FILE *file)
8045 {
8046 if (TARGET_ABI_OPEN_VMS)
8047 fputs ("\t.prologue\n", file);
8048 else if (!flag_inhibit_size_directive)
8049 fprintf (file, "\t.prologue %d\n",
8050 alpha_function_needs_gp || cfun->is_thunk);
8051 }
8052
8053 /* Write function epilogue. */
8054
8055 void
8056 alpha_expand_epilogue (void)
8057 {
8058 /* Registers to save. */
8059 unsigned long imask = 0;
8060 unsigned long fmask = 0;
8061 /* Stack space needed for pushing registers clobbered by us. */
8062 HOST_WIDE_INT sa_size;
8063 /* Complete stack size needed. */
8064 HOST_WIDE_INT frame_size;
8065 /* Offset from base reg to register save area. */
8066 HOST_WIDE_INT reg_offset;
8067 int fp_is_frame_pointer, fp_offset;
8068 rtx sa_reg, sa_reg_exp = NULL;
8069 rtx sp_adj1, sp_adj2, mem, reg, insn;
8070 rtx eh_ofs;
8071 rtx cfa_restores = NULL_RTX;
8072 int i;
8073
8074 sa_size = alpha_sa_size ();
8075 frame_size = compute_frame_size (get_frame_size (), sa_size);
8076
8077 if (TARGET_ABI_OPEN_VMS)
8078 {
8079 if (alpha_procedure_type == PT_STACK)
8080 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8081 else
8082 reg_offset = 0;
8083 }
8084 else
8085 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8086
8087 alpha_sa_mask (&imask, &fmask);
8088
8089 fp_is_frame_pointer
8090 = (TARGET_ABI_OPEN_VMS
8091 ? alpha_procedure_type == PT_STACK
8092 : frame_pointer_needed);
8093 fp_offset = 0;
8094 sa_reg = stack_pointer_rtx;
8095
8096 if (crtl->calls_eh_return)
8097 eh_ofs = EH_RETURN_STACKADJ_RTX;
8098 else
8099 eh_ofs = NULL_RTX;
8100
8101 if (sa_size)
8102 {
8103 /* If we have a frame pointer, restore SP from it. */
8104 if (TARGET_ABI_OPEN_VMS
8105 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8106 : frame_pointer_needed)
8107 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8108
8109 /* Cope with very large offsets to the register save area. */
8110 if (reg_offset + sa_size > 0x8000)
8111 {
8112 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8113 HOST_WIDE_INT bias;
8114
8115 if (low + sa_size <= 0x8000)
8116 bias = reg_offset - low, reg_offset = low;
8117 else
8118 bias = reg_offset, reg_offset = 0;
8119
8120 sa_reg = gen_rtx_REG (DImode, 22);
8121 sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
8122
8123 emit_move_insn (sa_reg, sa_reg_exp);
8124 }
8125
8126 /* Restore registers in order, excepting a true frame pointer. */
8127
8128 mem = gen_frame_mem (DImode, plus_constant (sa_reg, reg_offset));
8129 reg = gen_rtx_REG (DImode, REG_RA);
8130 emit_move_insn (reg, mem);
8131 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8132
8133 reg_offset += 8;
8134 imask &= ~(1UL << REG_RA);
8135
8136 for (i = 0; i < 31; ++i)
8137 if (imask & (1UL << i))
8138 {
8139 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8140 fp_offset = reg_offset;
8141 else
8142 {
8143 mem = gen_frame_mem (DImode,
8144 plus_constant (sa_reg, reg_offset));
8145 reg = gen_rtx_REG (DImode, i);
8146 emit_move_insn (reg, mem);
8147 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8148 cfa_restores);
8149 }
8150 reg_offset += 8;
8151 }
8152
8153 for (i = 0; i < 31; ++i)
8154 if (fmask & (1UL << i))
8155 {
8156 mem = gen_frame_mem (DFmode, plus_constant (sa_reg, reg_offset));
8157 reg = gen_rtx_REG (DFmode, i+32);
8158 emit_move_insn (reg, mem);
8159 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8160 reg_offset += 8;
8161 }
8162 }
8163
8164 if (frame_size || eh_ofs)
8165 {
8166 sp_adj1 = stack_pointer_rtx;
8167
8168 if (eh_ofs)
8169 {
8170 sp_adj1 = gen_rtx_REG (DImode, 23);
8171 emit_move_insn (sp_adj1,
8172 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8173 }
8174
8175 /* If the stack size is large, begin computation into a temporary
8176 register so as not to interfere with a potential fp restore,
8177 which must be consecutive with an SP restore. */
8178 if (frame_size < 32768 && !cfun->calls_alloca)
8179 sp_adj2 = GEN_INT (frame_size);
8180 else if (frame_size < 0x40007fffL)
8181 {
8182 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8183
8184 sp_adj2 = plus_constant (sp_adj1, frame_size - low);
8185 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8186 sp_adj1 = sa_reg;
8187 else
8188 {
8189 sp_adj1 = gen_rtx_REG (DImode, 23);
8190 emit_move_insn (sp_adj1, sp_adj2);
8191 }
8192 sp_adj2 = GEN_INT (low);
8193 }
8194 else
8195 {
8196 rtx tmp = gen_rtx_REG (DImode, 23);
8197 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8198 if (!sp_adj2)
8199 {
8200 /* We can't drop new things to memory this late, afaik,
8201 so build it up by pieces. */
8202 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8203 -(frame_size < 0));
8204 gcc_assert (sp_adj2);
8205 }
8206 }
8207
8208 /* From now on, things must be in order. So emit blockages. */
8209
8210 /* Restore the frame pointer. */
8211 if (fp_is_frame_pointer)
8212 {
8213 emit_insn (gen_blockage ());
8214 mem = gen_frame_mem (DImode, plus_constant (sa_reg, fp_offset));
8215 emit_move_insn (hard_frame_pointer_rtx, mem);
8216 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8217 hard_frame_pointer_rtx, cfa_restores);
8218 }
8219 else if (TARGET_ABI_OPEN_VMS)
8220 {
8221 emit_insn (gen_blockage ());
8222 emit_move_insn (hard_frame_pointer_rtx,
8223 gen_rtx_REG (DImode, vms_save_fp_regno));
8224 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8225 hard_frame_pointer_rtx, cfa_restores);
8226 }
8227
8228 /* Restore the stack pointer. */
8229 emit_insn (gen_blockage ());
8230 if (sp_adj2 == const0_rtx)
8231 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8232 else
8233 insn = emit_move_insn (stack_pointer_rtx,
8234 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8235 REG_NOTES (insn) = cfa_restores;
8236 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8237 RTX_FRAME_RELATED_P (insn) = 1;
8238 }
8239 else
8240 {
8241 gcc_assert (cfa_restores == NULL);
8242
8243 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8244 {
8245 emit_insn (gen_blockage ());
8246 insn = emit_move_insn (hard_frame_pointer_rtx,
8247 gen_rtx_REG (DImode, vms_save_fp_regno));
8248 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8249 RTX_FRAME_RELATED_P (insn) = 1;
8250 }
8251 }
8252 }
8253 \f
8254 /* Output the rest of the textual info surrounding the epilogue. */
8255
8256 void
8257 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8258 {
8259 rtx insn;
8260
8261 /* We output a nop after noreturn calls at the very end of the function to
8262 ensure that the return address always remains in the caller's code range,
8263 as not doing so might confuse unwinding engines. */
8264 insn = get_last_insn ();
8265 if (!INSN_P (insn))
8266 insn = prev_active_insn (insn);
8267 if (insn && CALL_P (insn))
8268 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8269
8270 #if TARGET_ABI_OPEN_VMS
8271 /* Write the linkage entries. */
8272 alpha_write_linkage (file, fnname);
8273 #endif
8274
8275 /* End the function. */
8276 if (TARGET_ABI_OPEN_VMS
8277 || !flag_inhibit_size_directive)
8278 {
8279 fputs ("\t.end ", file);
8280 assemble_name (file, fnname);
8281 putc ('\n', file);
8282 }
8283 inside_function = FALSE;
8284 }
8285
8286 #if TARGET_ABI_OSF
8287 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8288
8289 In order to avoid the hordes of differences between generated code
8290 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8291 lots of code loading up large constants, generate rtl and emit it
8292 instead of going straight to text.
8293
8294 Not sure why this idea hasn't been explored before... */
8295
8296 static void
8297 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8298 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8299 tree function)
8300 {
8301 HOST_WIDE_INT hi, lo;
8302 rtx this_rtx, insn, funexp;
8303
8304 /* We always require a valid GP. */
8305 emit_insn (gen_prologue_ldgp ());
8306 emit_note (NOTE_INSN_PROLOGUE_END);
8307
8308 /* Find the "this" pointer. If the function returns a structure,
8309 the structure return pointer is in $16. */
8310 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8311 this_rtx = gen_rtx_REG (Pmode, 17);
8312 else
8313 this_rtx = gen_rtx_REG (Pmode, 16);
8314
8315 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8316 entire constant for the add. */
8317 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8318 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8319 if (hi + lo == delta)
8320 {
8321 if (hi)
8322 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8323 if (lo)
8324 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8325 }
8326 else
8327 {
8328 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8329 delta, -(delta < 0));
8330 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8331 }
8332
8333 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8334 if (vcall_offset)
8335 {
8336 rtx tmp, tmp2;
8337
8338 tmp = gen_rtx_REG (Pmode, 0);
8339 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8340
8341 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8342 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8343 if (hi + lo == vcall_offset)
8344 {
8345 if (hi)
8346 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8347 }
8348 else
8349 {
8350 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8351 vcall_offset, -(vcall_offset < 0));
8352 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8353 lo = 0;
8354 }
8355 if (lo)
8356 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8357 else
8358 tmp2 = tmp;
8359 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8360
8361 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8362 }
8363
8364 /* Generate a tail call to the target function. */
8365 if (! TREE_USED (function))
8366 {
8367 assemble_external (function);
8368 TREE_USED (function) = 1;
8369 }
8370 funexp = XEXP (DECL_RTL (function), 0);
8371 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8372 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8373 SIBLING_CALL_P (insn) = 1;
8374
8375 /* Run just enough of rest_of_compilation to get the insns emitted.
8376 There's not really enough bulk here to make other passes such as
8377 instruction scheduling worth while. Note that use_thunk calls
8378 assemble_start_function and assemble_end_function. */
8379 insn = get_insns ();
8380 insn_locators_alloc ();
8381 shorten_branches (insn);
8382 final_start_function (insn, file, 1);
8383 final (insn, file, 1);
8384 final_end_function ();
8385 }
8386 #endif /* TARGET_ABI_OSF */
8387 \f
8388 /* Debugging support. */
8389
8390 #include "gstab.h"
8391
8392 /* Count the number of sdb related labels are generated (to find block
8393 start and end boundaries). */
8394
8395 int sdb_label_count = 0;
8396
8397 /* Name of the file containing the current function. */
8398
8399 static const char *current_function_file = "";
8400
8401 /* Offsets to alpha virtual arg/local debugging pointers. */
8402
8403 long alpha_arg_offset;
8404 long alpha_auto_offset;
8405 \f
8406 /* Emit a new filename to a stream. */
8407
8408 void
8409 alpha_output_filename (FILE *stream, const char *name)
8410 {
8411 static int first_time = TRUE;
8412
8413 if (first_time)
8414 {
8415 first_time = FALSE;
8416 ++num_source_filenames;
8417 current_function_file = name;
8418 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8419 output_quoted_string (stream, name);
8420 fprintf (stream, "\n");
8421 if (!TARGET_GAS && write_symbols == DBX_DEBUG)
8422 fprintf (stream, "\t#@stabs\n");
8423 }
8424
8425 else if (write_symbols == DBX_DEBUG)
8426 /* dbxout.c will emit an appropriate .stabs directive. */
8427 return;
8428
8429 else if (name != current_function_file
8430 && strcmp (name, current_function_file) != 0)
8431 {
8432 if (inside_function && ! TARGET_GAS)
8433 fprintf (stream, "\t#.file\t%d ", num_source_filenames);
8434 else
8435 {
8436 ++num_source_filenames;
8437 current_function_file = name;
8438 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8439 }
8440
8441 output_quoted_string (stream, name);
8442 fprintf (stream, "\n");
8443 }
8444 }
8445 \f
8446 /* Structure to show the current status of registers and memory. */
8447
8448 struct shadow_summary
8449 {
8450 struct {
8451 unsigned int i : 31; /* Mask of int regs */
8452 unsigned int fp : 31; /* Mask of fp regs */
8453 unsigned int mem : 1; /* mem == imem | fpmem */
8454 } used, defd;
8455 };
8456
8457 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8458 to the summary structure. SET is nonzero if the insn is setting the
8459 object, otherwise zero. */
8460
8461 static void
8462 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8463 {
8464 const char *format_ptr;
8465 int i, j;
8466
8467 if (x == 0)
8468 return;
8469
8470 switch (GET_CODE (x))
8471 {
8472 /* ??? Note that this case would be incorrect if the Alpha had a
8473 ZERO_EXTRACT in SET_DEST. */
8474 case SET:
8475 summarize_insn (SET_SRC (x), sum, 0);
8476 summarize_insn (SET_DEST (x), sum, 1);
8477 break;
8478
8479 case CLOBBER:
8480 summarize_insn (XEXP (x, 0), sum, 1);
8481 break;
8482
8483 case USE:
8484 summarize_insn (XEXP (x, 0), sum, 0);
8485 break;
8486
8487 case ASM_OPERANDS:
8488 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8489 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8490 break;
8491
8492 case PARALLEL:
8493 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8494 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8495 break;
8496
8497 case SUBREG:
8498 summarize_insn (SUBREG_REG (x), sum, 0);
8499 break;
8500
8501 case REG:
8502 {
8503 int regno = REGNO (x);
8504 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8505
8506 if (regno == 31 || regno == 63)
8507 break;
8508
8509 if (set)
8510 {
8511 if (regno < 32)
8512 sum->defd.i |= mask;
8513 else
8514 sum->defd.fp |= mask;
8515 }
8516 else
8517 {
8518 if (regno < 32)
8519 sum->used.i |= mask;
8520 else
8521 sum->used.fp |= mask;
8522 }
8523 }
8524 break;
8525
8526 case MEM:
8527 if (set)
8528 sum->defd.mem = 1;
8529 else
8530 sum->used.mem = 1;
8531
8532 /* Find the regs used in memory address computation: */
8533 summarize_insn (XEXP (x, 0), sum, 0);
8534 break;
8535
8536 case CONST_INT: case CONST_DOUBLE:
8537 case SYMBOL_REF: case LABEL_REF: case CONST:
8538 case SCRATCH: case ASM_INPUT:
8539 break;
8540
8541 /* Handle common unary and binary ops for efficiency. */
8542 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8543 case MOD: case UDIV: case UMOD: case AND: case IOR:
8544 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8545 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8546 case NE: case EQ: case GE: case GT: case LE:
8547 case LT: case GEU: case GTU: case LEU: case LTU:
8548 summarize_insn (XEXP (x, 0), sum, 0);
8549 summarize_insn (XEXP (x, 1), sum, 0);
8550 break;
8551
8552 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8553 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8554 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8555 case SQRT: case FFS:
8556 summarize_insn (XEXP (x, 0), sum, 0);
8557 break;
8558
8559 default:
8560 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8561 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8562 switch (format_ptr[i])
8563 {
8564 case 'e':
8565 summarize_insn (XEXP (x, i), sum, 0);
8566 break;
8567
8568 case 'E':
8569 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8570 summarize_insn (XVECEXP (x, i, j), sum, 0);
8571 break;
8572
8573 case 'i':
8574 break;
8575
8576 default:
8577 gcc_unreachable ();
8578 }
8579 }
8580 }
8581
8582 /* Ensure a sufficient number of `trapb' insns are in the code when
8583 the user requests code with a trap precision of functions or
8584 instructions.
8585
8586 In naive mode, when the user requests a trap-precision of
8587 "instruction", a trapb is needed after every instruction that may
8588 generate a trap. This ensures that the code is resumption safe but
8589 it is also slow.
8590
8591 When optimizations are turned on, we delay issuing a trapb as long
8592 as possible. In this context, a trap shadow is the sequence of
8593 instructions that starts with a (potentially) trap generating
8594 instruction and extends to the next trapb or call_pal instruction
8595 (but GCC never generates call_pal by itself). We can delay (and
8596 therefore sometimes omit) a trapb subject to the following
8597 conditions:
8598
8599 (a) On entry to the trap shadow, if any Alpha register or memory
8600 location contains a value that is used as an operand value by some
8601 instruction in the trap shadow (live on entry), then no instruction
8602 in the trap shadow may modify the register or memory location.
8603
8604 (b) Within the trap shadow, the computation of the base register
8605 for a memory load or store instruction may not involve using the
8606 result of an instruction that might generate an UNPREDICTABLE
8607 result.
8608
8609 (c) Within the trap shadow, no register may be used more than once
8610 as a destination register. (This is to make life easier for the
8611 trap-handler.)
8612
8613 (d) The trap shadow may not include any branch instructions. */
8614
8615 static void
8616 alpha_handle_trap_shadows (void)
8617 {
8618 struct shadow_summary shadow;
8619 int trap_pending, exception_nesting;
8620 rtx i, n;
8621
8622 trap_pending = 0;
8623 exception_nesting = 0;
8624 shadow.used.i = 0;
8625 shadow.used.fp = 0;
8626 shadow.used.mem = 0;
8627 shadow.defd = shadow.used;
8628
8629 for (i = get_insns (); i ; i = NEXT_INSN (i))
8630 {
8631 if (NOTE_P (i))
8632 {
8633 switch (NOTE_KIND (i))
8634 {
8635 case NOTE_INSN_EH_REGION_BEG:
8636 exception_nesting++;
8637 if (trap_pending)
8638 goto close_shadow;
8639 break;
8640
8641 case NOTE_INSN_EH_REGION_END:
8642 exception_nesting--;
8643 if (trap_pending)
8644 goto close_shadow;
8645 break;
8646
8647 case NOTE_INSN_EPILOGUE_BEG:
8648 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8649 goto close_shadow;
8650 break;
8651 }
8652 }
8653 else if (trap_pending)
8654 {
8655 if (alpha_tp == ALPHA_TP_FUNC)
8656 {
8657 if (JUMP_P (i)
8658 && GET_CODE (PATTERN (i)) == RETURN)
8659 goto close_shadow;
8660 }
8661 else if (alpha_tp == ALPHA_TP_INSN)
8662 {
8663 if (optimize > 0)
8664 {
8665 struct shadow_summary sum;
8666
8667 sum.used.i = 0;
8668 sum.used.fp = 0;
8669 sum.used.mem = 0;
8670 sum.defd = sum.used;
8671
8672 switch (GET_CODE (i))
8673 {
8674 case INSN:
8675 /* Annoyingly, get_attr_trap will die on these. */
8676 if (GET_CODE (PATTERN (i)) == USE
8677 || GET_CODE (PATTERN (i)) == CLOBBER)
8678 break;
8679
8680 summarize_insn (PATTERN (i), &sum, 0);
8681
8682 if ((sum.defd.i & shadow.defd.i)
8683 || (sum.defd.fp & shadow.defd.fp))
8684 {
8685 /* (c) would be violated */
8686 goto close_shadow;
8687 }
8688
8689 /* Combine shadow with summary of current insn: */
8690 shadow.used.i |= sum.used.i;
8691 shadow.used.fp |= sum.used.fp;
8692 shadow.used.mem |= sum.used.mem;
8693 shadow.defd.i |= sum.defd.i;
8694 shadow.defd.fp |= sum.defd.fp;
8695 shadow.defd.mem |= sum.defd.mem;
8696
8697 if ((sum.defd.i & shadow.used.i)
8698 || (sum.defd.fp & shadow.used.fp)
8699 || (sum.defd.mem & shadow.used.mem))
8700 {
8701 /* (a) would be violated (also takes care of (b)) */
8702 gcc_assert (get_attr_trap (i) != TRAP_YES
8703 || (!(sum.defd.i & sum.used.i)
8704 && !(sum.defd.fp & sum.used.fp)));
8705
8706 goto close_shadow;
8707 }
8708 break;
8709
8710 case JUMP_INSN:
8711 case CALL_INSN:
8712 case CODE_LABEL:
8713 goto close_shadow;
8714
8715 default:
8716 gcc_unreachable ();
8717 }
8718 }
8719 else
8720 {
8721 close_shadow:
8722 n = emit_insn_before (gen_trapb (), i);
8723 PUT_MODE (n, TImode);
8724 PUT_MODE (i, TImode);
8725 trap_pending = 0;
8726 shadow.used.i = 0;
8727 shadow.used.fp = 0;
8728 shadow.used.mem = 0;
8729 shadow.defd = shadow.used;
8730 }
8731 }
8732 }
8733
8734 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8735 && NONJUMP_INSN_P (i)
8736 && GET_CODE (PATTERN (i)) != USE
8737 && GET_CODE (PATTERN (i)) != CLOBBER
8738 && get_attr_trap (i) == TRAP_YES)
8739 {
8740 if (optimize && !trap_pending)
8741 summarize_insn (PATTERN (i), &shadow, 0);
8742 trap_pending = 1;
8743 }
8744 }
8745 }
8746 \f
8747 /* Alpha can only issue instruction groups simultaneously if they are
8748 suitably aligned. This is very processor-specific. */
8749 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8750 that are marked "fake". These instructions do not exist on that target,
8751 but it is possible to see these insns with deranged combinations of
8752 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8753 choose a result at random. */
8754
8755 enum alphaev4_pipe {
8756 EV4_STOP = 0,
8757 EV4_IB0 = 1,
8758 EV4_IB1 = 2,
8759 EV4_IBX = 4
8760 };
8761
8762 enum alphaev5_pipe {
8763 EV5_STOP = 0,
8764 EV5_NONE = 1,
8765 EV5_E01 = 2,
8766 EV5_E0 = 4,
8767 EV5_E1 = 8,
8768 EV5_FAM = 16,
8769 EV5_FA = 32,
8770 EV5_FM = 64
8771 };
8772
8773 static enum alphaev4_pipe
8774 alphaev4_insn_pipe (rtx insn)
8775 {
8776 if (recog_memoized (insn) < 0)
8777 return EV4_STOP;
8778 if (get_attr_length (insn) != 4)
8779 return EV4_STOP;
8780
8781 switch (get_attr_type (insn))
8782 {
8783 case TYPE_ILD:
8784 case TYPE_LDSYM:
8785 case TYPE_FLD:
8786 case TYPE_LD_L:
8787 return EV4_IBX;
8788
8789 case TYPE_IADD:
8790 case TYPE_ILOG:
8791 case TYPE_ICMOV:
8792 case TYPE_ICMP:
8793 case TYPE_FST:
8794 case TYPE_SHIFT:
8795 case TYPE_IMUL:
8796 case TYPE_FBR:
8797 case TYPE_MVI: /* fake */
8798 return EV4_IB0;
8799
8800 case TYPE_IST:
8801 case TYPE_MISC:
8802 case TYPE_IBR:
8803 case TYPE_JSR:
8804 case TYPE_CALLPAL:
8805 case TYPE_FCPYS:
8806 case TYPE_FCMOV:
8807 case TYPE_FADD:
8808 case TYPE_FDIV:
8809 case TYPE_FMUL:
8810 case TYPE_ST_C:
8811 case TYPE_MB:
8812 case TYPE_FSQRT: /* fake */
8813 case TYPE_FTOI: /* fake */
8814 case TYPE_ITOF: /* fake */
8815 return EV4_IB1;
8816
8817 default:
8818 gcc_unreachable ();
8819 }
8820 }
8821
8822 static enum alphaev5_pipe
8823 alphaev5_insn_pipe (rtx insn)
8824 {
8825 if (recog_memoized (insn) < 0)
8826 return EV5_STOP;
8827 if (get_attr_length (insn) != 4)
8828 return EV5_STOP;
8829
8830 switch (get_attr_type (insn))
8831 {
8832 case TYPE_ILD:
8833 case TYPE_FLD:
8834 case TYPE_LDSYM:
8835 case TYPE_IADD:
8836 case TYPE_ILOG:
8837 case TYPE_ICMOV:
8838 case TYPE_ICMP:
8839 return EV5_E01;
8840
8841 case TYPE_IST:
8842 case TYPE_FST:
8843 case TYPE_SHIFT:
8844 case TYPE_IMUL:
8845 case TYPE_MISC:
8846 case TYPE_MVI:
8847 case TYPE_LD_L:
8848 case TYPE_ST_C:
8849 case TYPE_MB:
8850 case TYPE_FTOI: /* fake */
8851 case TYPE_ITOF: /* fake */
8852 return EV5_E0;
8853
8854 case TYPE_IBR:
8855 case TYPE_JSR:
8856 case TYPE_CALLPAL:
8857 return EV5_E1;
8858
8859 case TYPE_FCPYS:
8860 return EV5_FAM;
8861
8862 case TYPE_FBR:
8863 case TYPE_FCMOV:
8864 case TYPE_FADD:
8865 case TYPE_FDIV:
8866 case TYPE_FSQRT: /* fake */
8867 return EV5_FA;
8868
8869 case TYPE_FMUL:
8870 return EV5_FM;
8871
8872 default:
8873 gcc_unreachable ();
8874 }
8875 }
8876
8877 /* IN_USE is a mask of the slots currently filled within the insn group.
8878 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8879 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8880
8881 LEN is, of course, the length of the group in bytes. */
8882
8883 static rtx
8884 alphaev4_next_group (rtx insn, int *pin_use, int *plen)
8885 {
8886 int len, in_use;
8887
8888 len = in_use = 0;
8889
8890 if (! INSN_P (insn)
8891 || GET_CODE (PATTERN (insn)) == CLOBBER
8892 || GET_CODE (PATTERN (insn)) == USE)
8893 goto next_and_done;
8894
8895 while (1)
8896 {
8897 enum alphaev4_pipe pipe;
8898
8899 pipe = alphaev4_insn_pipe (insn);
8900 switch (pipe)
8901 {
8902 case EV4_STOP:
8903 /* Force complex instructions to start new groups. */
8904 if (in_use)
8905 goto done;
8906
8907 /* If this is a completely unrecognized insn, it's an asm.
8908 We don't know how long it is, so record length as -1 to
8909 signal a needed realignment. */
8910 if (recog_memoized (insn) < 0)
8911 len = -1;
8912 else
8913 len = get_attr_length (insn);
8914 goto next_and_done;
8915
8916 case EV4_IBX:
8917 if (in_use & EV4_IB0)
8918 {
8919 if (in_use & EV4_IB1)
8920 goto done;
8921 in_use |= EV4_IB1;
8922 }
8923 else
8924 in_use |= EV4_IB0 | EV4_IBX;
8925 break;
8926
8927 case EV4_IB0:
8928 if (in_use & EV4_IB0)
8929 {
8930 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8931 goto done;
8932 in_use |= EV4_IB1;
8933 }
8934 in_use |= EV4_IB0;
8935 break;
8936
8937 case EV4_IB1:
8938 if (in_use & EV4_IB1)
8939 goto done;
8940 in_use |= EV4_IB1;
8941 break;
8942
8943 default:
8944 gcc_unreachable ();
8945 }
8946 len += 4;
8947
8948 /* Haifa doesn't do well scheduling branches. */
8949 if (JUMP_P (insn))
8950 goto next_and_done;
8951
8952 next:
8953 insn = next_nonnote_insn (insn);
8954
8955 if (!insn || ! INSN_P (insn))
8956 goto done;
8957
8958 /* Let Haifa tell us where it thinks insn group boundaries are. */
8959 if (GET_MODE (insn) == TImode)
8960 goto done;
8961
8962 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
8963 goto next;
8964 }
8965
8966 next_and_done:
8967 insn = next_nonnote_insn (insn);
8968
8969 done:
8970 *plen = len;
8971 *pin_use = in_use;
8972 return insn;
8973 }
8974
8975 /* IN_USE is a mask of the slots currently filled within the insn group.
8976 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
8977 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
8978
8979 LEN is, of course, the length of the group in bytes. */
8980
8981 static rtx
8982 alphaev5_next_group (rtx insn, int *pin_use, int *plen)
8983 {
8984 int len, in_use;
8985
8986 len = in_use = 0;
8987
8988 if (! INSN_P (insn)
8989 || GET_CODE (PATTERN (insn)) == CLOBBER
8990 || GET_CODE (PATTERN (insn)) == USE)
8991 goto next_and_done;
8992
8993 while (1)
8994 {
8995 enum alphaev5_pipe pipe;
8996
8997 pipe = alphaev5_insn_pipe (insn);
8998 switch (pipe)
8999 {
9000 case EV5_STOP:
9001 /* Force complex instructions to start new groups. */
9002 if (in_use)
9003 goto done;
9004
9005 /* If this is a completely unrecognized insn, it's an asm.
9006 We don't know how long it is, so record length as -1 to
9007 signal a needed realignment. */
9008 if (recog_memoized (insn) < 0)
9009 len = -1;
9010 else
9011 len = get_attr_length (insn);
9012 goto next_and_done;
9013
9014 /* ??? Most of the places below, we would like to assert never
9015 happen, as it would indicate an error either in Haifa, or
9016 in the scheduling description. Unfortunately, Haifa never
9017 schedules the last instruction of the BB, so we don't have
9018 an accurate TI bit to go off. */
9019 case EV5_E01:
9020 if (in_use & EV5_E0)
9021 {
9022 if (in_use & EV5_E1)
9023 goto done;
9024 in_use |= EV5_E1;
9025 }
9026 else
9027 in_use |= EV5_E0 | EV5_E01;
9028 break;
9029
9030 case EV5_E0:
9031 if (in_use & EV5_E0)
9032 {
9033 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9034 goto done;
9035 in_use |= EV5_E1;
9036 }
9037 in_use |= EV5_E0;
9038 break;
9039
9040 case EV5_E1:
9041 if (in_use & EV5_E1)
9042 goto done;
9043 in_use |= EV5_E1;
9044 break;
9045
9046 case EV5_FAM:
9047 if (in_use & EV5_FA)
9048 {
9049 if (in_use & EV5_FM)
9050 goto done;
9051 in_use |= EV5_FM;
9052 }
9053 else
9054 in_use |= EV5_FA | EV5_FAM;
9055 break;
9056
9057 case EV5_FA:
9058 if (in_use & EV5_FA)
9059 goto done;
9060 in_use |= EV5_FA;
9061 break;
9062
9063 case EV5_FM:
9064 if (in_use & EV5_FM)
9065 goto done;
9066 in_use |= EV5_FM;
9067 break;
9068
9069 case EV5_NONE:
9070 break;
9071
9072 default:
9073 gcc_unreachable ();
9074 }
9075 len += 4;
9076
9077 /* Haifa doesn't do well scheduling branches. */
9078 /* ??? If this is predicted not-taken, slotting continues, except
9079 that no more IBR, FBR, or JSR insns may be slotted. */
9080 if (JUMP_P (insn))
9081 goto next_and_done;
9082
9083 next:
9084 insn = next_nonnote_insn (insn);
9085
9086 if (!insn || ! INSN_P (insn))
9087 goto done;
9088
9089 /* Let Haifa tell us where it thinks insn group boundaries are. */
9090 if (GET_MODE (insn) == TImode)
9091 goto done;
9092
9093 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9094 goto next;
9095 }
9096
9097 next_and_done:
9098 insn = next_nonnote_insn (insn);
9099
9100 done:
9101 *plen = len;
9102 *pin_use = in_use;
9103 return insn;
9104 }
9105
9106 static rtx
9107 alphaev4_next_nop (int *pin_use)
9108 {
9109 int in_use = *pin_use;
9110 rtx nop;
9111
9112 if (!(in_use & EV4_IB0))
9113 {
9114 in_use |= EV4_IB0;
9115 nop = gen_nop ();
9116 }
9117 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9118 {
9119 in_use |= EV4_IB1;
9120 nop = gen_nop ();
9121 }
9122 else if (TARGET_FP && !(in_use & EV4_IB1))
9123 {
9124 in_use |= EV4_IB1;
9125 nop = gen_fnop ();
9126 }
9127 else
9128 nop = gen_unop ();
9129
9130 *pin_use = in_use;
9131 return nop;
9132 }
9133
9134 static rtx
9135 alphaev5_next_nop (int *pin_use)
9136 {
9137 int in_use = *pin_use;
9138 rtx nop;
9139
9140 if (!(in_use & EV5_E1))
9141 {
9142 in_use |= EV5_E1;
9143 nop = gen_nop ();
9144 }
9145 else if (TARGET_FP && !(in_use & EV5_FA))
9146 {
9147 in_use |= EV5_FA;
9148 nop = gen_fnop ();
9149 }
9150 else if (TARGET_FP && !(in_use & EV5_FM))
9151 {
9152 in_use |= EV5_FM;
9153 nop = gen_fnop ();
9154 }
9155 else
9156 nop = gen_unop ();
9157
9158 *pin_use = in_use;
9159 return nop;
9160 }
9161
9162 /* The instruction group alignment main loop. */
9163
9164 static void
9165 alpha_align_insns (unsigned int max_align,
9166 rtx (*next_group) (rtx, int *, int *),
9167 rtx (*next_nop) (int *))
9168 {
9169 /* ALIGN is the known alignment for the insn group. */
9170 unsigned int align;
9171 /* OFS is the offset of the current insn in the insn group. */
9172 int ofs;
9173 int prev_in_use, in_use, len, ldgp;
9174 rtx i, next;
9175
9176 /* Let shorten branches care for assigning alignments to code labels. */
9177 shorten_branches (get_insns ());
9178
9179 if (align_functions < 4)
9180 align = 4;
9181 else if ((unsigned int) align_functions < max_align)
9182 align = align_functions;
9183 else
9184 align = max_align;
9185
9186 ofs = prev_in_use = 0;
9187 i = get_insns ();
9188 if (NOTE_P (i))
9189 i = next_nonnote_insn (i);
9190
9191 ldgp = alpha_function_needs_gp ? 8 : 0;
9192
9193 while (i)
9194 {
9195 next = (*next_group) (i, &in_use, &len);
9196
9197 /* When we see a label, resync alignment etc. */
9198 if (LABEL_P (i))
9199 {
9200 unsigned int new_align = 1 << label_to_alignment (i);
9201
9202 if (new_align >= align)
9203 {
9204 align = new_align < max_align ? new_align : max_align;
9205 ofs = 0;
9206 }
9207
9208 else if (ofs & (new_align-1))
9209 ofs = (ofs | (new_align-1)) + 1;
9210 gcc_assert (!len);
9211 }
9212
9213 /* Handle complex instructions special. */
9214 else if (in_use == 0)
9215 {
9216 /* Asms will have length < 0. This is a signal that we have
9217 lost alignment knowledge. Assume, however, that the asm
9218 will not mis-align instructions. */
9219 if (len < 0)
9220 {
9221 ofs = 0;
9222 align = 4;
9223 len = 0;
9224 }
9225 }
9226
9227 /* If the known alignment is smaller than the recognized insn group,
9228 realign the output. */
9229 else if ((int) align < len)
9230 {
9231 unsigned int new_log_align = len > 8 ? 4 : 3;
9232 rtx prev, where;
9233
9234 where = prev = prev_nonnote_insn (i);
9235 if (!where || !LABEL_P (where))
9236 where = i;
9237
9238 /* Can't realign between a call and its gp reload. */
9239 if (! (TARGET_EXPLICIT_RELOCS
9240 && prev && CALL_P (prev)))
9241 {
9242 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9243 align = 1 << new_log_align;
9244 ofs = 0;
9245 }
9246 }
9247
9248 /* We may not insert padding inside the initial ldgp sequence. */
9249 else if (ldgp > 0)
9250 ldgp -= len;
9251
9252 /* If the group won't fit in the same INT16 as the previous,
9253 we need to add padding to keep the group together. Rather
9254 than simply leaving the insn filling to the assembler, we
9255 can make use of the knowledge of what sorts of instructions
9256 were issued in the previous group to make sure that all of
9257 the added nops are really free. */
9258 else if (ofs + len > (int) align)
9259 {
9260 int nop_count = (align - ofs) / 4;
9261 rtx where;
9262
9263 /* Insert nops before labels, branches, and calls to truly merge
9264 the execution of the nops with the previous instruction group. */
9265 where = prev_nonnote_insn (i);
9266 if (where)
9267 {
9268 if (LABEL_P (where))
9269 {
9270 rtx where2 = prev_nonnote_insn (where);
9271 if (where2 && JUMP_P (where2))
9272 where = where2;
9273 }
9274 else if (NONJUMP_INSN_P (where))
9275 where = i;
9276 }
9277 else
9278 where = i;
9279
9280 do
9281 emit_insn_before ((*next_nop)(&prev_in_use), where);
9282 while (--nop_count);
9283 ofs = 0;
9284 }
9285
9286 ofs = (ofs + len) & (align - 1);
9287 prev_in_use = in_use;
9288 i = next;
9289 }
9290 }
9291
9292 /* Insert an unop between a noreturn function call and GP load. */
9293
9294 static void
9295 alpha_pad_noreturn (void)
9296 {
9297 rtx insn, next;
9298
9299 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9300 {
9301 if (! (CALL_P (insn)
9302 && find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9303 continue;
9304
9305 /* Make sure we do not split a call and its corresponding
9306 CALL_ARG_LOCATION note. */
9307 if (CALL_P (insn))
9308 {
9309 next = NEXT_INSN (insn);
9310 if (next && NOTE_P (next)
9311 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9312 insn = next;
9313 }
9314
9315 next = next_active_insn (insn);
9316
9317 if (next)
9318 {
9319 rtx pat = PATTERN (next);
9320
9321 if (GET_CODE (pat) == SET
9322 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9323 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9324 emit_insn_after (gen_unop (), insn);
9325 }
9326 }
9327 }
9328 \f
9329 /* Machine dependent reorg pass. */
9330
9331 static void
9332 alpha_reorg (void)
9333 {
9334 /* Workaround for a linker error that triggers when an
9335 exception handler immediatelly follows a noreturn function.
9336
9337 The instruction stream from an object file:
9338
9339 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9340 58: 00 00 ba 27 ldah gp,0(ra)
9341 5c: 00 00 bd 23 lda gp,0(gp)
9342 60: 00 00 7d a7 ldq t12,0(gp)
9343 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9344
9345 was converted in the final link pass to:
9346
9347 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9348 fdb28: 00 00 fe 2f unop
9349 fdb2c: 00 00 fe 2f unop
9350 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9351 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9352
9353 GP load instructions were wrongly cleared by the linker relaxation
9354 pass. This workaround prevents removal of GP loads by inserting
9355 an unop instruction between a noreturn function call and
9356 exception handler prologue. */
9357
9358 if (current_function_has_exception_handlers ())
9359 alpha_pad_noreturn ();
9360
9361 if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
9362 alpha_handle_trap_shadows ();
9363
9364 /* Due to the number of extra trapb insns, don't bother fixing up
9365 alignment when trap precision is instruction. Moreover, we can
9366 only do our job when sched2 is run. */
9367 if (optimize && !optimize_size
9368 && alpha_tp != ALPHA_TP_INSN
9369 && flag_schedule_insns_after_reload)
9370 {
9371 if (alpha_tune == PROCESSOR_EV4)
9372 alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
9373 else if (alpha_tune == PROCESSOR_EV5)
9374 alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
9375 }
9376 }
9377 \f
9378 #ifdef HAVE_STAMP_H
9379 #include <stamp.h>
9380 #endif
9381
9382 static void
9383 alpha_file_start (void)
9384 {
9385 #ifdef OBJECT_FORMAT_ELF
9386 /* If emitting dwarf2 debug information, we cannot generate a .file
9387 directive to start the file, as it will conflict with dwarf2out
9388 file numbers. So it's only useful when emitting mdebug output. */
9389 targetm.asm_file_start_file_directive = (write_symbols == DBX_DEBUG);
9390 #endif
9391
9392 default_file_start ();
9393 #ifdef MS_STAMP
9394 fprintf (asm_out_file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
9395 #endif
9396
9397 fputs ("\t.set noreorder\n", asm_out_file);
9398 fputs ("\t.set volatile\n", asm_out_file);
9399 if (TARGET_ABI_OSF)
9400 fputs ("\t.set noat\n", asm_out_file);
9401 if (TARGET_EXPLICIT_RELOCS)
9402 fputs ("\t.set nomacro\n", asm_out_file);
9403 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9404 {
9405 const char *arch;
9406
9407 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9408 arch = "ev6";
9409 else if (TARGET_MAX)
9410 arch = "pca56";
9411 else if (TARGET_BWX)
9412 arch = "ev56";
9413 else if (alpha_cpu == PROCESSOR_EV5)
9414 arch = "ev5";
9415 else
9416 arch = "ev4";
9417
9418 fprintf (asm_out_file, "\t.arch %s\n", arch);
9419 }
9420 }
9421
9422 #ifdef OBJECT_FORMAT_ELF
9423 /* Since we don't have a .dynbss section, we should not allow global
9424 relocations in the .rodata section. */
9425
9426 static int
9427 alpha_elf_reloc_rw_mask (void)
9428 {
9429 return flag_pic ? 3 : 2;
9430 }
9431
9432 /* Return a section for X. The only special thing we do here is to
9433 honor small data. */
9434
9435 static section *
9436 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9437 unsigned HOST_WIDE_INT align)
9438 {
9439 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9440 /* ??? Consider using mergeable sdata sections. */
9441 return sdata_section;
9442 else
9443 return default_elf_select_rtx_section (mode, x, align);
9444 }
9445
9446 static unsigned int
9447 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9448 {
9449 unsigned int flags = 0;
9450
9451 if (strcmp (name, ".sdata") == 0
9452 || strncmp (name, ".sdata.", 7) == 0
9453 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9454 || strcmp (name, ".sbss") == 0
9455 || strncmp (name, ".sbss.", 6) == 0
9456 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9457 flags = SECTION_SMALL;
9458
9459 flags |= default_section_type_flags (decl, name, reloc);
9460 return flags;
9461 }
9462 #endif /* OBJECT_FORMAT_ELF */
9463 \f
9464 /* Structure to collect function names for final output in link section. */
9465 /* Note that items marked with GTY can't be ifdef'ed out. */
9466
9467 enum reloc_kind
9468 {
9469 KIND_LINKAGE,
9470 KIND_CODEADDR
9471 };
9472
9473 struct GTY(()) alpha_links
9474 {
9475 rtx func;
9476 rtx linkage;
9477 enum reloc_kind rkind;
9478 };
9479
9480 #if TARGET_ABI_OPEN_VMS
9481
9482 /* Return the VMS argument type corresponding to MODE. */
9483
9484 enum avms_arg_type
9485 alpha_arg_type (enum machine_mode mode)
9486 {
9487 switch (mode)
9488 {
9489 case SFmode:
9490 return TARGET_FLOAT_VAX ? FF : FS;
9491 case DFmode:
9492 return TARGET_FLOAT_VAX ? FD : FT;
9493 default:
9494 return I64;
9495 }
9496 }
9497
9498 /* Return an rtx for an integer representing the VMS Argument Information
9499 register value. */
9500
9501 rtx
9502 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9503 {
9504 unsigned HOST_WIDE_INT regval = cum.num_args;
9505 int i;
9506
9507 for (i = 0; i < 6; i++)
9508 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9509
9510 return GEN_INT (regval);
9511 }
9512 \f
9513
9514 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9515 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9516 this is the reference to the linkage pointer value, 0 if this is the
9517 reference to the function entry value. RFLAG is 1 if this a reduced
9518 reference (code address only), 0 if this is a full reference. */
9519
9520 rtx
9521 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9522 {
9523 struct alpha_links *al = NULL;
9524 const char *name = XSTR (func, 0);
9525
9526 if (cfun->machine->links)
9527 {
9528 splay_tree_node lnode;
9529
9530 /* Is this name already defined? */
9531 lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name);
9532 if (lnode)
9533 al = (struct alpha_links *) lnode->value;
9534 }
9535 else
9536 cfun->machine->links = splay_tree_new_ggc
9537 ((splay_tree_compare_fn) strcmp,
9538 ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
9539 ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
9540
9541 if (al == NULL)
9542 {
9543 size_t buf_len;
9544 char *linksym;
9545 tree id;
9546
9547 if (name[0] == '*')
9548 name++;
9549
9550 /* Follow transparent alias, as this is used for CRTL translations. */
9551 id = maybe_get_identifier (name);
9552 if (id)
9553 {
9554 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9555 id = TREE_CHAIN (id);
9556 name = IDENTIFIER_POINTER (id);
9557 }
9558
9559 buf_len = strlen (name) + 8 + 9;
9560 linksym = (char *) alloca (buf_len);
9561 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9562
9563 al = ggc_alloc_alpha_links ();
9564 al->func = func;
9565 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9566
9567 splay_tree_insert (cfun->machine->links,
9568 (splay_tree_key) ggc_strdup (name),
9569 (splay_tree_value) al);
9570 }
9571
9572 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9573
9574 if (lflag)
9575 return gen_rtx_MEM (Pmode, plus_constant (al->linkage, 8));
9576 else
9577 return al->linkage;
9578 }
9579
9580 static int
9581 alpha_write_one_linkage (splay_tree_node node, void *data)
9582 {
9583 const char *const name = (const char *) node->key;
9584 struct alpha_links *link = (struct alpha_links *) node->value;
9585 FILE *stream = (FILE *) data;
9586
9587 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9588 if (link->rkind == KIND_CODEADDR)
9589 {
9590 /* External and used, request code address. */
9591 fprintf (stream, "\t.code_address ");
9592 }
9593 else
9594 {
9595 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9596 && SYMBOL_REF_LOCAL_P (link->func))
9597 {
9598 /* Locally defined, build linkage pair. */
9599 fprintf (stream, "\t.quad %s..en\n", name);
9600 fprintf (stream, "\t.quad ");
9601 }
9602 else
9603 {
9604 /* External, request linkage pair. */
9605 fprintf (stream, "\t.linkage ");
9606 }
9607 }
9608 assemble_name (stream, name);
9609 fputs ("\n", stream);
9610
9611 return 0;
9612 }
9613
9614 static void
9615 alpha_write_linkage (FILE *stream, const char *funname)
9616 {
9617 fprintf (stream, "\t.link\n");
9618 fprintf (stream, "\t.align 3\n");
9619 in_section = NULL;
9620
9621 #ifdef TARGET_VMS_CRASH_DEBUG
9622 fputs ("\t.name ", stream);
9623 assemble_name (stream, funname);
9624 fputs ("..na\n", stream);
9625 #endif
9626
9627 ASM_OUTPUT_LABEL (stream, funname);
9628 fprintf (stream, "\t.pdesc ");
9629 assemble_name (stream, funname);
9630 fprintf (stream, "..en,%s\n",
9631 alpha_procedure_type == PT_STACK ? "stack"
9632 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9633
9634 if (cfun->machine->links)
9635 {
9636 splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream);
9637 /* splay_tree_delete (func->links); */
9638 }
9639 }
9640
9641 /* Switch to an arbitrary section NAME with attributes as specified
9642 by FLAGS. ALIGN specifies any known alignment requirements for
9643 the section; 0 if the default should be used. */
9644
9645 static void
9646 vms_asm_named_section (const char *name, unsigned int flags,
9647 tree decl ATTRIBUTE_UNUSED)
9648 {
9649 fputc ('\n', asm_out_file);
9650 fprintf (asm_out_file, ".section\t%s", name);
9651
9652 if (flags & SECTION_DEBUG)
9653 fprintf (asm_out_file, ",NOWRT");
9654
9655 fputc ('\n', asm_out_file);
9656 }
9657
9658 /* Record an element in the table of global constructors. SYMBOL is
9659 a SYMBOL_REF of the function to be called; PRIORITY is a number
9660 between 0 and MAX_INIT_PRIORITY.
9661
9662 Differs from default_ctors_section_asm_out_constructor in that the
9663 width of the .ctors entry is always 64 bits, rather than the 32 bits
9664 used by a normal pointer. */
9665
9666 static void
9667 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9668 {
9669 switch_to_section (ctors_section);
9670 assemble_align (BITS_PER_WORD);
9671 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9672 }
9673
9674 static void
9675 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9676 {
9677 switch_to_section (dtors_section);
9678 assemble_align (BITS_PER_WORD);
9679 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9680 }
9681 #else
9682 rtx
9683 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9684 bool lflag ATTRIBUTE_UNUSED,
9685 bool rflag ATTRIBUTE_UNUSED)
9686 {
9687 return NULL_RTX;
9688 }
9689
9690 #endif /* TARGET_ABI_OPEN_VMS */
9691 \f
9692 static void
9693 alpha_init_libfuncs (void)
9694 {
9695 if (TARGET_ABI_OPEN_VMS)
9696 {
9697 /* Use the VMS runtime library functions for division and
9698 remainder. */
9699 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9700 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9701 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9702 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9703 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9704 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9705 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9706 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9707 abort_libfunc = init_one_libfunc ("decc$abort");
9708 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9709 #ifdef MEM_LIBFUNCS_INIT
9710 MEM_LIBFUNCS_INIT;
9711 #endif
9712 }
9713 }
9714
9715 /* On the Alpha, we use this to disable the floating-point registers
9716 when they don't exist. */
9717
9718 static void
9719 alpha_conditional_register_usage (void)
9720 {
9721 int i;
9722 if (! TARGET_FPREGS)
9723 for (i = 32; i < 63; i++)
9724 fixed_regs[i] = call_used_regs[i] = 1;
9725 }
9726 \f
9727 /* Initialize the GCC target structure. */
9728 #if TARGET_ABI_OPEN_VMS
9729 # undef TARGET_ATTRIBUTE_TABLE
9730 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9731 # undef TARGET_CAN_ELIMINATE
9732 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9733 #endif
9734
9735 #undef TARGET_IN_SMALL_DATA_P
9736 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9737
9738 #undef TARGET_ASM_ALIGNED_HI_OP
9739 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9740 #undef TARGET_ASM_ALIGNED_DI_OP
9741 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9742
9743 /* Default unaligned ops are provided for ELF systems. To get unaligned
9744 data for non-ELF systems, we have to turn off auto alignment. */
9745 #if !defined (OBJECT_FORMAT_ELF) || TARGET_ABI_OPEN_VMS
9746 #undef TARGET_ASM_UNALIGNED_HI_OP
9747 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9748 #undef TARGET_ASM_UNALIGNED_SI_OP
9749 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9750 #undef TARGET_ASM_UNALIGNED_DI_OP
9751 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9752 #endif
9753
9754 #ifdef OBJECT_FORMAT_ELF
9755 #undef TARGET_ASM_RELOC_RW_MASK
9756 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9757 #undef TARGET_ASM_SELECT_RTX_SECTION
9758 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9759 #undef TARGET_SECTION_TYPE_FLAGS
9760 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9761 #endif
9762
9763 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9764 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9765
9766 #undef TARGET_INIT_LIBFUNCS
9767 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9768
9769 #undef TARGET_LEGITIMIZE_ADDRESS
9770 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9771
9772 #undef TARGET_ASM_FILE_START
9773 #define TARGET_ASM_FILE_START alpha_file_start
9774 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
9775 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
9776
9777 #undef TARGET_SCHED_ADJUST_COST
9778 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9779 #undef TARGET_SCHED_ISSUE_RATE
9780 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9781 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9782 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9783 alpha_multipass_dfa_lookahead
9784
9785 #undef TARGET_HAVE_TLS
9786 #define TARGET_HAVE_TLS HAVE_AS_TLS
9787
9788 #undef TARGET_BUILTIN_DECL
9789 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9790 #undef TARGET_INIT_BUILTINS
9791 #define TARGET_INIT_BUILTINS alpha_init_builtins
9792 #undef TARGET_EXPAND_BUILTIN
9793 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9794 #undef TARGET_FOLD_BUILTIN
9795 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9796
9797 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9798 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9799 #undef TARGET_CANNOT_COPY_INSN_P
9800 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9801 #undef TARGET_LEGITIMATE_CONSTANT_P
9802 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9803 #undef TARGET_CANNOT_FORCE_CONST_MEM
9804 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9805
9806 #if TARGET_ABI_OSF
9807 #undef TARGET_ASM_OUTPUT_MI_THUNK
9808 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
9809 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9810 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
9811 #undef TARGET_STDARG_OPTIMIZE_HOOK
9812 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
9813 #endif
9814
9815 /* Use 16-bits anchor. */
9816 #undef TARGET_MIN_ANCHOR_OFFSET
9817 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
9818 #undef TARGET_MAX_ANCHOR_OFFSET
9819 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
9820 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9821 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
9822
9823 #undef TARGET_RTX_COSTS
9824 #define TARGET_RTX_COSTS alpha_rtx_costs
9825 #undef TARGET_ADDRESS_COST
9826 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
9827
9828 #undef TARGET_MACHINE_DEPENDENT_REORG
9829 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
9830
9831 #undef TARGET_PROMOTE_FUNCTION_MODE
9832 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
9833 #undef TARGET_PROMOTE_PROTOTYPES
9834 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
9835 #undef TARGET_RETURN_IN_MEMORY
9836 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
9837 #undef TARGET_PASS_BY_REFERENCE
9838 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
9839 #undef TARGET_SETUP_INCOMING_VARARGS
9840 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
9841 #undef TARGET_STRICT_ARGUMENT_NAMING
9842 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
9843 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
9844 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
9845 #undef TARGET_SPLIT_COMPLEX_ARG
9846 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
9847 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9848 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
9849 #undef TARGET_ARG_PARTIAL_BYTES
9850 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
9851 #undef TARGET_FUNCTION_ARG
9852 #define TARGET_FUNCTION_ARG alpha_function_arg
9853 #undef TARGET_FUNCTION_ARG_ADVANCE
9854 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
9855 #undef TARGET_TRAMPOLINE_INIT
9856 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
9857
9858 #undef TARGET_INSTANTIATE_DECLS
9859 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
9860
9861 #undef TARGET_SECONDARY_RELOAD
9862 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
9863
9864 #undef TARGET_SCALAR_MODE_SUPPORTED_P
9865 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
9866 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9867 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
9868
9869 #undef TARGET_BUILD_BUILTIN_VA_LIST
9870 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
9871
9872 #undef TARGET_EXPAND_BUILTIN_VA_START
9873 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
9874
9875 /* The Alpha architecture does not require sequential consistency. See
9876 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
9877 for an example of how it can be violated in practice. */
9878 #undef TARGET_RELAXED_ORDERING
9879 #define TARGET_RELAXED_ORDERING true
9880
9881 #undef TARGET_OPTION_OVERRIDE
9882 #define TARGET_OPTION_OVERRIDE alpha_option_override
9883
9884 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9885 #undef TARGET_MANGLE_TYPE
9886 #define TARGET_MANGLE_TYPE alpha_mangle_type
9887 #endif
9888
9889 #undef TARGET_LEGITIMATE_ADDRESS_P
9890 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
9891
9892 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9893 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
9894
9895 struct gcc_target targetm = TARGET_INITIALIZER;
9896
9897 \f
9898 #include "gt-alpha.h"