67b454aa0c08fb484b7a04b8d3fa2b14cdba9a50
[gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "calls.h"
30 #include "varasm.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "reload.h"
42 #include "obstack.h"
43 #include "except.h"
44 #include "function.h"
45 #include "diagnostic-core.h"
46 #include "ggc.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "common/common-target.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "splay-tree.h"
54 #include "hash-table.h"
55 #include "vec.h"
56 #include "basic-block.h"
57 #include "tree-ssa-alias.h"
58 #include "internal-fn.h"
59 #include "gimple-fold.h"
60 #include "tree-eh.h"
61 #include "gimple-expr.h"
62 #include "is-a.h"
63 #include "gimple.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "gimple-iterator.h"
68 #include "gimplify.h"
69 #include "gimple-ssa.h"
70 #include "stringpool.h"
71 #include "tree-ssanames.h"
72 #include "tree-stdarg.h"
73 #include "tm-constrs.h"
74 #include "df.h"
75 #include "libfuncs.h"
76 #include "opts.h"
77 #include "params.h"
78 #include "builtins.h"
79
80 /* Specify which cpu to schedule for. */
81 enum processor_type alpha_tune;
82
83 /* Which cpu we're generating code for. */
84 enum processor_type alpha_cpu;
85
86 static const char * const alpha_cpu_name[] =
87 {
88 "ev4", "ev5", "ev6"
89 };
90
91 /* Specify how accurate floating-point traps need to be. */
92
93 enum alpha_trap_precision alpha_tp;
94
95 /* Specify the floating-point rounding mode. */
96
97 enum alpha_fp_rounding_mode alpha_fprm;
98
99 /* Specify which things cause traps. */
100
101 enum alpha_fp_trap_mode alpha_fptm;
102
103 /* Nonzero if inside of a function, because the Alpha asm can't
104 handle .files inside of functions. */
105
106 static int inside_function = FALSE;
107
108 /* The number of cycles of latency we should assume on memory reads. */
109
110 int alpha_memory_latency = 3;
111
112 /* Whether the function needs the GP. */
113
114 static int alpha_function_needs_gp;
115
116 /* The assembler name of the current function. */
117
118 static const char *alpha_fnname;
119
120 /* The next explicit relocation sequence number. */
121 extern GTY(()) int alpha_next_sequence_number;
122 int alpha_next_sequence_number = 1;
123
124 /* The literal and gpdisp sequence numbers for this insn, as printed
125 by %# and %* respectively. */
126 extern GTY(()) int alpha_this_literal_sequence_number;
127 extern GTY(()) int alpha_this_gpdisp_sequence_number;
128 int alpha_this_literal_sequence_number;
129 int alpha_this_gpdisp_sequence_number;
130
131 /* Costs of various operations on the different architectures. */
132
133 struct alpha_rtx_cost_data
134 {
135 unsigned char fp_add;
136 unsigned char fp_mult;
137 unsigned char fp_div_sf;
138 unsigned char fp_div_df;
139 unsigned char int_mult_si;
140 unsigned char int_mult_di;
141 unsigned char int_shift;
142 unsigned char int_cmov;
143 unsigned short int_div;
144 };
145
146 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
147 {
148 { /* EV4 */
149 COSTS_N_INSNS (6), /* fp_add */
150 COSTS_N_INSNS (6), /* fp_mult */
151 COSTS_N_INSNS (34), /* fp_div_sf */
152 COSTS_N_INSNS (63), /* fp_div_df */
153 COSTS_N_INSNS (23), /* int_mult_si */
154 COSTS_N_INSNS (23), /* int_mult_di */
155 COSTS_N_INSNS (2), /* int_shift */
156 COSTS_N_INSNS (2), /* int_cmov */
157 COSTS_N_INSNS (97), /* int_div */
158 },
159 { /* EV5 */
160 COSTS_N_INSNS (4), /* fp_add */
161 COSTS_N_INSNS (4), /* fp_mult */
162 COSTS_N_INSNS (15), /* fp_div_sf */
163 COSTS_N_INSNS (22), /* fp_div_df */
164 COSTS_N_INSNS (8), /* int_mult_si */
165 COSTS_N_INSNS (12), /* int_mult_di */
166 COSTS_N_INSNS (1) + 1, /* int_shift */
167 COSTS_N_INSNS (1), /* int_cmov */
168 COSTS_N_INSNS (83), /* int_div */
169 },
170 { /* EV6 */
171 COSTS_N_INSNS (4), /* fp_add */
172 COSTS_N_INSNS (4), /* fp_mult */
173 COSTS_N_INSNS (12), /* fp_div_sf */
174 COSTS_N_INSNS (15), /* fp_div_df */
175 COSTS_N_INSNS (7), /* int_mult_si */
176 COSTS_N_INSNS (7), /* int_mult_di */
177 COSTS_N_INSNS (1), /* int_shift */
178 COSTS_N_INSNS (2), /* int_cmov */
179 COSTS_N_INSNS (86), /* int_div */
180 },
181 };
182
183 /* Similar but tuned for code size instead of execution latency. The
184 extra +N is fractional cost tuning based on latency. It's used to
185 encourage use of cheaper insns like shift, but only if there's just
186 one of them. */
187
188 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
189 {
190 COSTS_N_INSNS (1), /* fp_add */
191 COSTS_N_INSNS (1), /* fp_mult */
192 COSTS_N_INSNS (1), /* fp_div_sf */
193 COSTS_N_INSNS (1) + 1, /* fp_div_df */
194 COSTS_N_INSNS (1) + 1, /* int_mult_si */
195 COSTS_N_INSNS (1) + 2, /* int_mult_di */
196 COSTS_N_INSNS (1), /* int_shift */
197 COSTS_N_INSNS (1), /* int_cmov */
198 COSTS_N_INSNS (6), /* int_div */
199 };
200
201 /* Get the number of args of a function in one of two ways. */
202 #if TARGET_ABI_OPEN_VMS
203 #define NUM_ARGS crtl->args.info.num_args
204 #else
205 #define NUM_ARGS crtl->args.info
206 #endif
207
208 #define REG_PV 27
209 #define REG_RA 26
210
211 /* Declarations of static functions. */
212 static struct machine_function *alpha_init_machine_status (void);
213 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
214 static void alpha_handle_trap_shadows (void);
215 static void alpha_align_insns (void);
216
217 #if TARGET_ABI_OPEN_VMS
218 static void alpha_write_linkage (FILE *, const char *);
219 static bool vms_valid_pointer_mode (enum machine_mode);
220 #else
221 #define vms_patch_builtins() gcc_unreachable()
222 #endif
223 \f
224 static unsigned int
225 rest_of_handle_trap_shadows (void)
226 {
227 alpha_handle_trap_shadows ();
228 return 0;
229 }
230
231 namespace {
232
233 const pass_data pass_data_handle_trap_shadows =
234 {
235 RTL_PASS,
236 "trap_shadows", /* name */
237 OPTGROUP_NONE, /* optinfo_flags */
238 TV_NONE, /* tv_id */
239 0, /* properties_required */
240 0, /* properties_provided */
241 0, /* properties_destroyed */
242 0, /* todo_flags_start */
243 TODO_df_finish, /* todo_flags_finish */
244 };
245
246 class pass_handle_trap_shadows : public rtl_opt_pass
247 {
248 public:
249 pass_handle_trap_shadows(gcc::context *ctxt)
250 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
251 {}
252
253 /* opt_pass methods: */
254 virtual bool gate (function *)
255 {
256 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
257 }
258
259 virtual unsigned int execute (function *)
260 {
261 return rest_of_handle_trap_shadows ();
262 }
263
264 }; // class pass_handle_trap_shadows
265
266 } // anon namespace
267
268 rtl_opt_pass *
269 make_pass_handle_trap_shadows (gcc::context *ctxt)
270 {
271 return new pass_handle_trap_shadows (ctxt);
272 }
273
274 static unsigned int
275 rest_of_align_insns (void)
276 {
277 alpha_align_insns ();
278 return 0;
279 }
280
281 namespace {
282
283 const pass_data pass_data_align_insns =
284 {
285 RTL_PASS,
286 "align_insns", /* name */
287 OPTGROUP_NONE, /* optinfo_flags */
288 TV_NONE, /* tv_id */
289 0, /* properties_required */
290 0, /* properties_provided */
291 0, /* properties_destroyed */
292 0, /* todo_flags_start */
293 TODO_df_finish, /* todo_flags_finish */
294 };
295
296 class pass_align_insns : public rtl_opt_pass
297 {
298 public:
299 pass_align_insns(gcc::context *ctxt)
300 : rtl_opt_pass(pass_data_align_insns, ctxt)
301 {}
302
303 /* opt_pass methods: */
304 virtual bool gate (function *)
305 {
306 /* Due to the number of extra trapb insns, don't bother fixing up
307 alignment when trap precision is instruction. Moreover, we can
308 only do our job when sched2 is run. */
309 return ((alpha_tune == PROCESSOR_EV4
310 || alpha_tune == PROCESSOR_EV5)
311 && optimize && !optimize_size
312 && alpha_tp != ALPHA_TP_INSN
313 && flag_schedule_insns_after_reload);
314 }
315
316 virtual unsigned int execute (function *)
317 {
318 return rest_of_align_insns ();
319 }
320
321 }; // class pass_align_insns
322
323 } // anon namespace
324
325 rtl_opt_pass *
326 make_pass_align_insns (gcc::context *ctxt)
327 {
328 return new pass_align_insns (ctxt);
329 }
330
331 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
332 /* Implement TARGET_MANGLE_TYPE. */
333
334 static const char *
335 alpha_mangle_type (const_tree type)
336 {
337 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
338 && TARGET_LONG_DOUBLE_128)
339 return "g";
340
341 /* For all other types, use normal C++ mangling. */
342 return NULL;
343 }
344 #endif
345
346 /* Parse target option strings. */
347
348 static void
349 alpha_option_override (void)
350 {
351 static const struct cpu_table {
352 const char *const name;
353 const enum processor_type processor;
354 const int flags;
355 const unsigned short line_size; /* in bytes */
356 const unsigned short l1_size; /* in kb. */
357 const unsigned short l2_size; /* in kb. */
358 } cpu_table[] = {
359 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
360 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
361 had 64k to 8M 8-byte direct Bcache. */
362 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
363 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
364 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
365
366 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
367 and 1M to 16M 64 byte L3 (not modeled).
368 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
369 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
370 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
371 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
372 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
373 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
374 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
375 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
376 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
377
378 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
379 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
380 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
381 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
382 64, 64, 16*1024 },
383 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
384 64, 64, 16*1024 }
385 };
386
387 opt_pass *pass_handle_trap_shadows = make_pass_handle_trap_shadows (g);
388 static struct register_pass_info handle_trap_shadows_info
389 = { pass_handle_trap_shadows, "eh_ranges",
390 1, PASS_POS_INSERT_AFTER
391 };
392
393 opt_pass *pass_align_insns = make_pass_align_insns (g);
394 static struct register_pass_info align_insns_info
395 = { pass_align_insns, "shorten",
396 1, PASS_POS_INSERT_BEFORE
397 };
398
399 int const ct_size = ARRAY_SIZE (cpu_table);
400 int line_size = 0, l1_size = 0, l2_size = 0;
401 int i;
402
403 #ifdef SUBTARGET_OVERRIDE_OPTIONS
404 SUBTARGET_OVERRIDE_OPTIONS;
405 #endif
406
407 /* Default to full IEEE compliance mode for Go language. */
408 if (strcmp (lang_hooks.name, "GNU Go") == 0
409 && !(target_flags_explicit & MASK_IEEE))
410 target_flags |= MASK_IEEE;
411
412 alpha_fprm = ALPHA_FPRM_NORM;
413 alpha_tp = ALPHA_TP_PROG;
414 alpha_fptm = ALPHA_FPTM_N;
415
416 if (TARGET_IEEE)
417 {
418 alpha_tp = ALPHA_TP_INSN;
419 alpha_fptm = ALPHA_FPTM_SU;
420 }
421 if (TARGET_IEEE_WITH_INEXACT)
422 {
423 alpha_tp = ALPHA_TP_INSN;
424 alpha_fptm = ALPHA_FPTM_SUI;
425 }
426
427 if (alpha_tp_string)
428 {
429 if (! strcmp (alpha_tp_string, "p"))
430 alpha_tp = ALPHA_TP_PROG;
431 else if (! strcmp (alpha_tp_string, "f"))
432 alpha_tp = ALPHA_TP_FUNC;
433 else if (! strcmp (alpha_tp_string, "i"))
434 alpha_tp = ALPHA_TP_INSN;
435 else
436 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
437 }
438
439 if (alpha_fprm_string)
440 {
441 if (! strcmp (alpha_fprm_string, "n"))
442 alpha_fprm = ALPHA_FPRM_NORM;
443 else if (! strcmp (alpha_fprm_string, "m"))
444 alpha_fprm = ALPHA_FPRM_MINF;
445 else if (! strcmp (alpha_fprm_string, "c"))
446 alpha_fprm = ALPHA_FPRM_CHOP;
447 else if (! strcmp (alpha_fprm_string,"d"))
448 alpha_fprm = ALPHA_FPRM_DYN;
449 else
450 error ("bad value %qs for -mfp-rounding-mode switch",
451 alpha_fprm_string);
452 }
453
454 if (alpha_fptm_string)
455 {
456 if (strcmp (alpha_fptm_string, "n") == 0)
457 alpha_fptm = ALPHA_FPTM_N;
458 else if (strcmp (alpha_fptm_string, "u") == 0)
459 alpha_fptm = ALPHA_FPTM_U;
460 else if (strcmp (alpha_fptm_string, "su") == 0)
461 alpha_fptm = ALPHA_FPTM_SU;
462 else if (strcmp (alpha_fptm_string, "sui") == 0)
463 alpha_fptm = ALPHA_FPTM_SUI;
464 else
465 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
466 }
467
468 if (alpha_cpu_string)
469 {
470 for (i = 0; i < ct_size; i++)
471 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
472 {
473 alpha_tune = alpha_cpu = cpu_table[i].processor;
474 line_size = cpu_table[i].line_size;
475 l1_size = cpu_table[i].l1_size;
476 l2_size = cpu_table[i].l2_size;
477 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
478 target_flags |= cpu_table[i].flags;
479 break;
480 }
481 if (i == ct_size)
482 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
483 }
484
485 if (alpha_tune_string)
486 {
487 for (i = 0; i < ct_size; i++)
488 if (! strcmp (alpha_tune_string, cpu_table [i].name))
489 {
490 alpha_tune = cpu_table[i].processor;
491 line_size = cpu_table[i].line_size;
492 l1_size = cpu_table[i].l1_size;
493 l2_size = cpu_table[i].l2_size;
494 break;
495 }
496 if (i == ct_size)
497 error ("bad value %qs for -mtune switch", alpha_tune_string);
498 }
499
500 if (line_size)
501 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
502 global_options.x_param_values,
503 global_options_set.x_param_values);
504 if (l1_size)
505 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
506 global_options.x_param_values,
507 global_options_set.x_param_values);
508 if (l2_size)
509 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
510 global_options.x_param_values,
511 global_options_set.x_param_values);
512
513 /* Do some sanity checks on the above options. */
514
515 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
516 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
517 {
518 warning (0, "fp software completion requires -mtrap-precision=i");
519 alpha_tp = ALPHA_TP_INSN;
520 }
521
522 if (alpha_cpu == PROCESSOR_EV6)
523 {
524 /* Except for EV6 pass 1 (not released), we always have precise
525 arithmetic traps. Which means we can do software completion
526 without minding trap shadows. */
527 alpha_tp = ALPHA_TP_PROG;
528 }
529
530 if (TARGET_FLOAT_VAX)
531 {
532 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
533 {
534 warning (0, "rounding mode not supported for VAX floats");
535 alpha_fprm = ALPHA_FPRM_NORM;
536 }
537 if (alpha_fptm == ALPHA_FPTM_SUI)
538 {
539 warning (0, "trap mode not supported for VAX floats");
540 alpha_fptm = ALPHA_FPTM_SU;
541 }
542 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
543 warning (0, "128-bit long double not supported for VAX floats");
544 target_flags &= ~MASK_LONG_DOUBLE_128;
545 }
546
547 {
548 char *end;
549 int lat;
550
551 if (!alpha_mlat_string)
552 alpha_mlat_string = "L1";
553
554 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
555 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
556 ;
557 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
558 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
559 && alpha_mlat_string[2] == '\0')
560 {
561 static int const cache_latency[][4] =
562 {
563 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
564 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
565 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
566 };
567
568 lat = alpha_mlat_string[1] - '0';
569 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
570 {
571 warning (0, "L%d cache latency unknown for %s",
572 lat, alpha_cpu_name[alpha_tune]);
573 lat = 3;
574 }
575 else
576 lat = cache_latency[alpha_tune][lat-1];
577 }
578 else if (! strcmp (alpha_mlat_string, "main"))
579 {
580 /* Most current memories have about 370ns latency. This is
581 a reasonable guess for a fast cpu. */
582 lat = 150;
583 }
584 else
585 {
586 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
587 lat = 3;
588 }
589
590 alpha_memory_latency = lat;
591 }
592
593 /* Default the definition of "small data" to 8 bytes. */
594 if (!global_options_set.x_g_switch_value)
595 g_switch_value = 8;
596
597 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
598 if (flag_pic == 1)
599 target_flags |= MASK_SMALL_DATA;
600 else if (flag_pic == 2)
601 target_flags &= ~MASK_SMALL_DATA;
602
603 /* Align labels and loops for optimal branching. */
604 /* ??? Kludge these by not doing anything if we don't optimize. */
605 if (optimize > 0)
606 {
607 if (align_loops <= 0)
608 align_loops = 16;
609 if (align_jumps <= 0)
610 align_jumps = 16;
611 }
612 if (align_functions <= 0)
613 align_functions = 16;
614
615 /* Register variables and functions with the garbage collector. */
616
617 /* Set up function hooks. */
618 init_machine_status = alpha_init_machine_status;
619
620 /* Tell the compiler when we're using VAX floating point. */
621 if (TARGET_FLOAT_VAX)
622 {
623 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
624 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
625 REAL_MODE_FORMAT (TFmode) = NULL;
626 }
627
628 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
629 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
630 target_flags |= MASK_LONG_DOUBLE_128;
631 #endif
632
633 /* This needs to be done at start up. It's convenient to do it here. */
634 register_pass (&handle_trap_shadows_info);
635 register_pass (&align_insns_info);
636 }
637 \f
638 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
639
640 int
641 zap_mask (HOST_WIDE_INT value)
642 {
643 int i;
644
645 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
646 i++, value >>= 8)
647 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
648 return 0;
649
650 return 1;
651 }
652
653 /* Return true if OP is valid for a particular TLS relocation.
654 We are already guaranteed that OP is a CONST. */
655
656 int
657 tls_symbolic_operand_1 (rtx op, int size, int unspec)
658 {
659 op = XEXP (op, 0);
660
661 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
662 return 0;
663 op = XVECEXP (op, 0, 0);
664
665 if (GET_CODE (op) != SYMBOL_REF)
666 return 0;
667
668 switch (SYMBOL_REF_TLS_MODEL (op))
669 {
670 case TLS_MODEL_LOCAL_DYNAMIC:
671 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
672 case TLS_MODEL_INITIAL_EXEC:
673 return unspec == UNSPEC_TPREL && size == 64;
674 case TLS_MODEL_LOCAL_EXEC:
675 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
676 default:
677 gcc_unreachable ();
678 }
679 }
680
681 /* Used by aligned_memory_operand and unaligned_memory_operand to
682 resolve what reload is going to do with OP if it's a register. */
683
684 rtx
685 resolve_reload_operand (rtx op)
686 {
687 if (reload_in_progress)
688 {
689 rtx tmp = op;
690 if (GET_CODE (tmp) == SUBREG)
691 tmp = SUBREG_REG (tmp);
692 if (REG_P (tmp)
693 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
694 {
695 op = reg_equiv_memory_loc (REGNO (tmp));
696 if (op == 0)
697 return 0;
698 }
699 }
700 return op;
701 }
702
703 /* The scalar modes supported differs from the default check-what-c-supports
704 version in that sometimes TFmode is available even when long double
705 indicates only DFmode. */
706
707 static bool
708 alpha_scalar_mode_supported_p (enum machine_mode mode)
709 {
710 switch (mode)
711 {
712 case QImode:
713 case HImode:
714 case SImode:
715 case DImode:
716 case TImode: /* via optabs.c */
717 return true;
718
719 case SFmode:
720 case DFmode:
721 return true;
722
723 case TFmode:
724 return TARGET_HAS_XFLOATING_LIBS;
725
726 default:
727 return false;
728 }
729 }
730
731 /* Alpha implements a couple of integer vector mode operations when
732 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
733 which allows the vectorizer to operate on e.g. move instructions,
734 or when expand_vector_operations can do something useful. */
735
736 static bool
737 alpha_vector_mode_supported_p (enum machine_mode mode)
738 {
739 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
740 }
741
742 /* Return 1 if this function can directly return via $26. */
743
744 int
745 direct_return (void)
746 {
747 return (TARGET_ABI_OSF
748 && reload_completed
749 && alpha_sa_size () == 0
750 && get_frame_size () == 0
751 && crtl->outgoing_args_size == 0
752 && crtl->args.pretend_args_size == 0);
753 }
754
755 /* Return the TLS model to use for SYMBOL. */
756
757 static enum tls_model
758 tls_symbolic_operand_type (rtx symbol)
759 {
760 enum tls_model model;
761
762 if (GET_CODE (symbol) != SYMBOL_REF)
763 return TLS_MODEL_NONE;
764 model = SYMBOL_REF_TLS_MODEL (symbol);
765
766 /* Local-exec with a 64-bit size is the same code as initial-exec. */
767 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
768 model = TLS_MODEL_INITIAL_EXEC;
769
770 return model;
771 }
772 \f
773 /* Return true if the function DECL will share the same GP as any
774 function in the current unit of translation. */
775
776 static bool
777 decl_has_samegp (const_tree decl)
778 {
779 /* Functions that are not local can be overridden, and thus may
780 not share the same gp. */
781 if (!(*targetm.binds_local_p) (decl))
782 return false;
783
784 /* If -msmall-data is in effect, assume that there is only one GP
785 for the module, and so any local symbol has this property. We
786 need explicit relocations to be able to enforce this for symbols
787 not defined in this unit of translation, however. */
788 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
789 return true;
790
791 /* Functions that are not external are defined in this UoT. */
792 /* ??? Irritatingly, static functions not yet emitted are still
793 marked "external". Apply this to non-static functions only. */
794 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
795 }
796
797 /* Return true if EXP should be placed in the small data section. */
798
799 static bool
800 alpha_in_small_data_p (const_tree exp)
801 {
802 /* We want to merge strings, so we never consider them small data. */
803 if (TREE_CODE (exp) == STRING_CST)
804 return false;
805
806 /* Functions are never in the small data area. Duh. */
807 if (TREE_CODE (exp) == FUNCTION_DECL)
808 return false;
809
810 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
811 {
812 const char *section = DECL_SECTION_NAME (exp);
813 if (strcmp (section, ".sdata") == 0
814 || strcmp (section, ".sbss") == 0)
815 return true;
816 }
817 else
818 {
819 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
820
821 /* If this is an incomplete type with size 0, then we can't put it
822 in sdata because it might be too big when completed. */
823 if (size > 0 && size <= g_switch_value)
824 return true;
825 }
826
827 return false;
828 }
829
830 #if TARGET_ABI_OPEN_VMS
831 static bool
832 vms_valid_pointer_mode (enum machine_mode mode)
833 {
834 return (mode == SImode || mode == DImode);
835 }
836
837 static bool
838 alpha_linkage_symbol_p (const char *symname)
839 {
840 int symlen = strlen (symname);
841
842 if (symlen > 4)
843 return strcmp (&symname [symlen - 4], "..lk") == 0;
844
845 return false;
846 }
847
848 #define LINKAGE_SYMBOL_REF_P(X) \
849 ((GET_CODE (X) == SYMBOL_REF \
850 && alpha_linkage_symbol_p (XSTR (X, 0))) \
851 || (GET_CODE (X) == CONST \
852 && GET_CODE (XEXP (X, 0)) == PLUS \
853 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
854 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
855 #endif
856
857 /* legitimate_address_p recognizes an RTL expression that is a valid
858 memory address for an instruction. The MODE argument is the
859 machine mode for the MEM expression that wants to use this address.
860
861 For Alpha, we have either a constant address or the sum of a
862 register and a constant address, or just a register. For DImode,
863 any of those forms can be surrounded with an AND that clear the
864 low-order three bits; this is an "unaligned" access. */
865
866 static bool
867 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
868 {
869 /* If this is an ldq_u type address, discard the outer AND. */
870 if (mode == DImode
871 && GET_CODE (x) == AND
872 && CONST_INT_P (XEXP (x, 1))
873 && INTVAL (XEXP (x, 1)) == -8)
874 x = XEXP (x, 0);
875
876 /* Discard non-paradoxical subregs. */
877 if (GET_CODE (x) == SUBREG
878 && (GET_MODE_SIZE (GET_MODE (x))
879 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
880 x = SUBREG_REG (x);
881
882 /* Unadorned general registers are valid. */
883 if (REG_P (x)
884 && (strict
885 ? STRICT_REG_OK_FOR_BASE_P (x)
886 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
887 return true;
888
889 /* Constant addresses (i.e. +/- 32k) are valid. */
890 if (CONSTANT_ADDRESS_P (x))
891 return true;
892
893 #if TARGET_ABI_OPEN_VMS
894 if (LINKAGE_SYMBOL_REF_P (x))
895 return true;
896 #endif
897
898 /* Register plus a small constant offset is valid. */
899 if (GET_CODE (x) == PLUS)
900 {
901 rtx ofs = XEXP (x, 1);
902 x = XEXP (x, 0);
903
904 /* Discard non-paradoxical subregs. */
905 if (GET_CODE (x) == SUBREG
906 && (GET_MODE_SIZE (GET_MODE (x))
907 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
908 x = SUBREG_REG (x);
909
910 if (REG_P (x))
911 {
912 if (! strict
913 && NONSTRICT_REG_OK_FP_BASE_P (x)
914 && CONST_INT_P (ofs))
915 return true;
916 if ((strict
917 ? STRICT_REG_OK_FOR_BASE_P (x)
918 : NONSTRICT_REG_OK_FOR_BASE_P (x))
919 && CONSTANT_ADDRESS_P (ofs))
920 return true;
921 }
922 }
923
924 /* If we're managing explicit relocations, LO_SUM is valid, as are small
925 data symbols. Avoid explicit relocations of modes larger than word
926 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
927 else if (TARGET_EXPLICIT_RELOCS
928 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
929 {
930 if (small_symbolic_operand (x, Pmode))
931 return true;
932
933 if (GET_CODE (x) == LO_SUM)
934 {
935 rtx ofs = XEXP (x, 1);
936 x = XEXP (x, 0);
937
938 /* Discard non-paradoxical subregs. */
939 if (GET_CODE (x) == SUBREG
940 && (GET_MODE_SIZE (GET_MODE (x))
941 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
942 x = SUBREG_REG (x);
943
944 /* Must have a valid base register. */
945 if (! (REG_P (x)
946 && (strict
947 ? STRICT_REG_OK_FOR_BASE_P (x)
948 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
949 return false;
950
951 /* The symbol must be local. */
952 if (local_symbolic_operand (ofs, Pmode)
953 || dtp32_symbolic_operand (ofs, Pmode)
954 || tp32_symbolic_operand (ofs, Pmode))
955 return true;
956 }
957 }
958
959 return false;
960 }
961
962 /* Build the SYMBOL_REF for __tls_get_addr. */
963
964 static GTY(()) rtx tls_get_addr_libfunc;
965
966 static rtx
967 get_tls_get_addr (void)
968 {
969 if (!tls_get_addr_libfunc)
970 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
971 return tls_get_addr_libfunc;
972 }
973
974 /* Try machine-dependent ways of modifying an illegitimate address
975 to be legitimate. If we find one, return the new, valid address. */
976
977 static rtx
978 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
979 {
980 HOST_WIDE_INT addend;
981
982 /* If the address is (plus reg const_int) and the CONST_INT is not a
983 valid offset, compute the high part of the constant and add it to
984 the register. Then our address is (plus temp low-part-const). */
985 if (GET_CODE (x) == PLUS
986 && REG_P (XEXP (x, 0))
987 && CONST_INT_P (XEXP (x, 1))
988 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
989 {
990 addend = INTVAL (XEXP (x, 1));
991 x = XEXP (x, 0);
992 goto split_addend;
993 }
994
995 /* If the address is (const (plus FOO const_int)), find the low-order
996 part of the CONST_INT. Then load FOO plus any high-order part of the
997 CONST_INT into a register. Our address is (plus reg low-part-const).
998 This is done to reduce the number of GOT entries. */
999 if (can_create_pseudo_p ()
1000 && GET_CODE (x) == CONST
1001 && GET_CODE (XEXP (x, 0)) == PLUS
1002 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
1003 {
1004 addend = INTVAL (XEXP (XEXP (x, 0), 1));
1005 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
1006 goto split_addend;
1007 }
1008
1009 /* If we have a (plus reg const), emit the load as in (2), then add
1010 the two registers, and finally generate (plus reg low-part-const) as
1011 our address. */
1012 if (can_create_pseudo_p ()
1013 && GET_CODE (x) == PLUS
1014 && REG_P (XEXP (x, 0))
1015 && GET_CODE (XEXP (x, 1)) == CONST
1016 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1017 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1018 {
1019 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1020 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1021 XEXP (XEXP (XEXP (x, 1), 0), 0),
1022 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1023 goto split_addend;
1024 }
1025
1026 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1027 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1028 around +/- 32k offset. */
1029 if (TARGET_EXPLICIT_RELOCS
1030 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1031 && symbolic_operand (x, Pmode))
1032 {
1033 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
1034
1035 switch (tls_symbolic_operand_type (x))
1036 {
1037 case TLS_MODEL_NONE:
1038 break;
1039
1040 case TLS_MODEL_GLOBAL_DYNAMIC:
1041 start_sequence ();
1042
1043 r0 = gen_rtx_REG (Pmode, 0);
1044 r16 = gen_rtx_REG (Pmode, 16);
1045 tga = get_tls_get_addr ();
1046 dest = gen_reg_rtx (Pmode);
1047 seq = GEN_INT (alpha_next_sequence_number++);
1048
1049 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1050 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
1051 insn = emit_call_insn (insn);
1052 RTL_CONST_CALL_P (insn) = 1;
1053 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1054
1055 insn = get_insns ();
1056 end_sequence ();
1057
1058 emit_libcall_block (insn, dest, r0, x);
1059 return dest;
1060
1061 case TLS_MODEL_LOCAL_DYNAMIC:
1062 start_sequence ();
1063
1064 r0 = gen_rtx_REG (Pmode, 0);
1065 r16 = gen_rtx_REG (Pmode, 16);
1066 tga = get_tls_get_addr ();
1067 scratch = gen_reg_rtx (Pmode);
1068 seq = GEN_INT (alpha_next_sequence_number++);
1069
1070 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1071 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1072 insn = emit_call_insn (insn);
1073 RTL_CONST_CALL_P (insn) = 1;
1074 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1075
1076 insn = get_insns ();
1077 end_sequence ();
1078
1079 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1080 UNSPEC_TLSLDM_CALL);
1081 emit_libcall_block (insn, scratch, r0, eqv);
1082
1083 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1084 eqv = gen_rtx_CONST (Pmode, eqv);
1085
1086 if (alpha_tls_size == 64)
1087 {
1088 dest = gen_reg_rtx (Pmode);
1089 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1090 emit_insn (gen_adddi3 (dest, dest, scratch));
1091 return dest;
1092 }
1093 if (alpha_tls_size == 32)
1094 {
1095 insn = gen_rtx_HIGH (Pmode, eqv);
1096 insn = gen_rtx_PLUS (Pmode, scratch, insn);
1097 scratch = gen_reg_rtx (Pmode);
1098 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1099 }
1100 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1101
1102 case TLS_MODEL_INITIAL_EXEC:
1103 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1104 eqv = gen_rtx_CONST (Pmode, eqv);
1105 tp = gen_reg_rtx (Pmode);
1106 scratch = gen_reg_rtx (Pmode);
1107 dest = gen_reg_rtx (Pmode);
1108
1109 emit_insn (gen_get_thread_pointerdi (tp));
1110 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1111 emit_insn (gen_adddi3 (dest, tp, scratch));
1112 return dest;
1113
1114 case TLS_MODEL_LOCAL_EXEC:
1115 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1116 eqv = gen_rtx_CONST (Pmode, eqv);
1117 tp = gen_reg_rtx (Pmode);
1118
1119 emit_insn (gen_get_thread_pointerdi (tp));
1120 if (alpha_tls_size == 32)
1121 {
1122 insn = gen_rtx_HIGH (Pmode, eqv);
1123 insn = gen_rtx_PLUS (Pmode, tp, insn);
1124 tp = gen_reg_rtx (Pmode);
1125 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1126 }
1127 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1128
1129 default:
1130 gcc_unreachable ();
1131 }
1132
1133 if (local_symbolic_operand (x, Pmode))
1134 {
1135 if (small_symbolic_operand (x, Pmode))
1136 return x;
1137 else
1138 {
1139 if (can_create_pseudo_p ())
1140 scratch = gen_reg_rtx (Pmode);
1141 emit_insn (gen_rtx_SET (VOIDmode, scratch,
1142 gen_rtx_HIGH (Pmode, x)));
1143 return gen_rtx_LO_SUM (Pmode, scratch, x);
1144 }
1145 }
1146 }
1147
1148 return NULL;
1149
1150 split_addend:
1151 {
1152 HOST_WIDE_INT low, high;
1153
1154 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1155 addend -= low;
1156 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1157 addend -= high;
1158
1159 if (addend)
1160 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1161 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1162 1, OPTAB_LIB_WIDEN);
1163 if (high)
1164 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1165 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1166 1, OPTAB_LIB_WIDEN);
1167
1168 return plus_constant (Pmode, x, low);
1169 }
1170 }
1171
1172
1173 /* Try machine-dependent ways of modifying an illegitimate address
1174 to be legitimate. Return X or the new, valid address. */
1175
1176 static rtx
1177 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1178 enum machine_mode mode)
1179 {
1180 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1181 return new_x ? new_x : x;
1182 }
1183
1184 /* Return true if ADDR has an effect that depends on the machine mode it
1185 is used for. On the Alpha this is true only for the unaligned modes.
1186 We can simplify the test since we know that the address must be valid. */
1187
1188 static bool
1189 alpha_mode_dependent_address_p (const_rtx addr,
1190 addr_space_t as ATTRIBUTE_UNUSED)
1191 {
1192 return GET_CODE (addr) == AND;
1193 }
1194
1195 /* Primarily this is required for TLS symbols, but given that our move
1196 patterns *ought* to be able to handle any symbol at any time, we
1197 should never be spilling symbolic operands to the constant pool, ever. */
1198
1199 static bool
1200 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1201 {
1202 enum rtx_code code = GET_CODE (x);
1203 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1204 }
1205
1206 /* We do not allow indirect calls to be optimized into sibling calls, nor
1207 can we allow a call to a function with a different GP to be optimized
1208 into a sibcall. */
1209
1210 static bool
1211 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1212 {
1213 /* Can't do indirect tail calls, since we don't know if the target
1214 uses the same GP. */
1215 if (!decl)
1216 return false;
1217
1218 /* Otherwise, we can make a tail call if the target function shares
1219 the same GP. */
1220 return decl_has_samegp (decl);
1221 }
1222
1223 int
1224 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1225 {
1226 rtx x = *px;
1227
1228 /* Don't re-split. */
1229 if (GET_CODE (x) == LO_SUM)
1230 return -1;
1231
1232 return small_symbolic_operand (x, Pmode) != 0;
1233 }
1234
1235 static int
1236 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1237 {
1238 rtx x = *px;
1239
1240 /* Don't re-split. */
1241 if (GET_CODE (x) == LO_SUM)
1242 return -1;
1243
1244 if (small_symbolic_operand (x, Pmode))
1245 {
1246 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1247 *px = x;
1248 return -1;
1249 }
1250
1251 return 0;
1252 }
1253
1254 rtx
1255 split_small_symbolic_operand (rtx x)
1256 {
1257 x = copy_insn (x);
1258 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1259 return x;
1260 }
1261
1262 /* Indicate that INSN cannot be duplicated. This is true for any insn
1263 that we've marked with gpdisp relocs, since those have to stay in
1264 1-1 correspondence with one another.
1265
1266 Technically we could copy them if we could set up a mapping from one
1267 sequence number to another, across the set of insns to be duplicated.
1268 This seems overly complicated and error-prone since interblock motion
1269 from sched-ebb could move one of the pair of insns to a different block.
1270
1271 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1272 then they'll be in a different block from their ldgp. Which could lead
1273 the bb reorder code to think that it would be ok to copy just the block
1274 containing the call and branch to the block containing the ldgp. */
1275
1276 static bool
1277 alpha_cannot_copy_insn_p (rtx_insn *insn)
1278 {
1279 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1280 return false;
1281 if (recog_memoized (insn) >= 0)
1282 return get_attr_cannot_copy (insn);
1283 else
1284 return false;
1285 }
1286
1287
1288 /* Try a machine-dependent way of reloading an illegitimate address
1289 operand. If we find one, push the reload and return the new rtx. */
1290
1291 rtx
1292 alpha_legitimize_reload_address (rtx x,
1293 enum machine_mode mode ATTRIBUTE_UNUSED,
1294 int opnum, int type,
1295 int ind_levels ATTRIBUTE_UNUSED)
1296 {
1297 /* We must recognize output that we have already generated ourselves. */
1298 if (GET_CODE (x) == PLUS
1299 && GET_CODE (XEXP (x, 0)) == PLUS
1300 && REG_P (XEXP (XEXP (x, 0), 0))
1301 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1302 && CONST_INT_P (XEXP (x, 1)))
1303 {
1304 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1305 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1306 opnum, (enum reload_type) type);
1307 return x;
1308 }
1309
1310 /* We wish to handle large displacements off a base register by
1311 splitting the addend across an ldah and the mem insn. This
1312 cuts number of extra insns needed from 3 to 1. */
1313 if (GET_CODE (x) == PLUS
1314 && REG_P (XEXP (x, 0))
1315 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1316 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1317 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1318 {
1319 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1320 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1321 HOST_WIDE_INT high
1322 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1323
1324 /* Check for 32-bit overflow. */
1325 if (high + low != val)
1326 return NULL_RTX;
1327
1328 /* Reload the high part into a base reg; leave the low part
1329 in the mem directly. */
1330 x = gen_rtx_PLUS (GET_MODE (x),
1331 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1332 GEN_INT (high)),
1333 GEN_INT (low));
1334
1335 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1336 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1337 opnum, (enum reload_type) type);
1338 return x;
1339 }
1340
1341 return NULL_RTX;
1342 }
1343 \f
1344 /* Compute a (partial) cost for rtx X. Return true if the complete
1345 cost has been computed, and false if subexpressions should be
1346 scanned. In either case, *TOTAL contains the cost result. */
1347
1348 static bool
1349 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1350 bool speed)
1351 {
1352 enum machine_mode mode = GET_MODE (x);
1353 bool float_mode_p = FLOAT_MODE_P (mode);
1354 const struct alpha_rtx_cost_data *cost_data;
1355
1356 if (!speed)
1357 cost_data = &alpha_rtx_cost_size;
1358 else
1359 cost_data = &alpha_rtx_cost_data[alpha_tune];
1360
1361 switch (code)
1362 {
1363 case CONST_INT:
1364 /* If this is an 8-bit constant, return zero since it can be used
1365 nearly anywhere with no cost. If it is a valid operand for an
1366 ADD or AND, likewise return 0 if we know it will be used in that
1367 context. Otherwise, return 2 since it might be used there later.
1368 All other constants take at least two insns. */
1369 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1370 {
1371 *total = 0;
1372 return true;
1373 }
1374 /* FALLTHRU */
1375
1376 case CONST_DOUBLE:
1377 if (x == CONST0_RTX (mode))
1378 *total = 0;
1379 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1380 || (outer_code == AND && and_operand (x, VOIDmode)))
1381 *total = 0;
1382 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1383 *total = 2;
1384 else
1385 *total = COSTS_N_INSNS (2);
1386 return true;
1387
1388 case CONST:
1389 case SYMBOL_REF:
1390 case LABEL_REF:
1391 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1392 *total = COSTS_N_INSNS (outer_code != MEM);
1393 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1394 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1395 else if (tls_symbolic_operand_type (x))
1396 /* Estimate of cost for call_pal rduniq. */
1397 /* ??? How many insns do we emit here? More than one... */
1398 *total = COSTS_N_INSNS (15);
1399 else
1400 /* Otherwise we do a load from the GOT. */
1401 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1402 return true;
1403
1404 case HIGH:
1405 /* This is effectively an add_operand. */
1406 *total = 2;
1407 return true;
1408
1409 case PLUS:
1410 case MINUS:
1411 if (float_mode_p)
1412 *total = cost_data->fp_add;
1413 else if (GET_CODE (XEXP (x, 0)) == MULT
1414 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1415 {
1416 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1417 (enum rtx_code) outer_code, opno, speed)
1418 + rtx_cost (XEXP (x, 1),
1419 (enum rtx_code) outer_code, opno, speed)
1420 + COSTS_N_INSNS (1));
1421 return true;
1422 }
1423 return false;
1424
1425 case MULT:
1426 if (float_mode_p)
1427 *total = cost_data->fp_mult;
1428 else if (mode == DImode)
1429 *total = cost_data->int_mult_di;
1430 else
1431 *total = cost_data->int_mult_si;
1432 return false;
1433
1434 case ASHIFT:
1435 if (CONST_INT_P (XEXP (x, 1))
1436 && INTVAL (XEXP (x, 1)) <= 3)
1437 {
1438 *total = COSTS_N_INSNS (1);
1439 return false;
1440 }
1441 /* FALLTHRU */
1442
1443 case ASHIFTRT:
1444 case LSHIFTRT:
1445 *total = cost_data->int_shift;
1446 return false;
1447
1448 case IF_THEN_ELSE:
1449 if (float_mode_p)
1450 *total = cost_data->fp_add;
1451 else
1452 *total = cost_data->int_cmov;
1453 return false;
1454
1455 case DIV:
1456 case UDIV:
1457 case MOD:
1458 case UMOD:
1459 if (!float_mode_p)
1460 *total = cost_data->int_div;
1461 else if (mode == SFmode)
1462 *total = cost_data->fp_div_sf;
1463 else
1464 *total = cost_data->fp_div_df;
1465 return false;
1466
1467 case MEM:
1468 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1469 return true;
1470
1471 case NEG:
1472 if (! float_mode_p)
1473 {
1474 *total = COSTS_N_INSNS (1);
1475 return false;
1476 }
1477 /* FALLTHRU */
1478
1479 case ABS:
1480 if (! float_mode_p)
1481 {
1482 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1483 return false;
1484 }
1485 /* FALLTHRU */
1486
1487 case FLOAT:
1488 case UNSIGNED_FLOAT:
1489 case FIX:
1490 case UNSIGNED_FIX:
1491 case FLOAT_TRUNCATE:
1492 *total = cost_data->fp_add;
1493 return false;
1494
1495 case FLOAT_EXTEND:
1496 if (MEM_P (XEXP (x, 0)))
1497 *total = 0;
1498 else
1499 *total = cost_data->fp_add;
1500 return false;
1501
1502 default:
1503 return false;
1504 }
1505 }
1506 \f
1507 /* REF is an alignable memory location. Place an aligned SImode
1508 reference into *PALIGNED_MEM and the number of bits to shift into
1509 *PBITNUM. SCRATCH is a free register for use in reloading out
1510 of range stack slots. */
1511
1512 void
1513 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1514 {
1515 rtx base;
1516 HOST_WIDE_INT disp, offset;
1517
1518 gcc_assert (MEM_P (ref));
1519
1520 if (reload_in_progress
1521 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1522 {
1523 base = find_replacement (&XEXP (ref, 0));
1524 gcc_assert (memory_address_p (GET_MODE (ref), base));
1525 }
1526 else
1527 base = XEXP (ref, 0);
1528
1529 if (GET_CODE (base) == PLUS)
1530 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1531 else
1532 disp = 0;
1533
1534 /* Find the byte offset within an aligned word. If the memory itself is
1535 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1536 will have examined the base register and determined it is aligned, and
1537 thus displacements from it are naturally alignable. */
1538 if (MEM_ALIGN (ref) >= 32)
1539 offset = 0;
1540 else
1541 offset = disp & 3;
1542
1543 /* The location should not cross aligned word boundary. */
1544 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1545 <= GET_MODE_SIZE (SImode));
1546
1547 /* Access the entire aligned word. */
1548 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1549
1550 /* Convert the byte offset within the word to a bit offset. */
1551 offset *= BITS_PER_UNIT;
1552 *pbitnum = GEN_INT (offset);
1553 }
1554
1555 /* Similar, but just get the address. Handle the two reload cases.
1556 Add EXTRA_OFFSET to the address we return. */
1557
1558 rtx
1559 get_unaligned_address (rtx ref)
1560 {
1561 rtx base;
1562 HOST_WIDE_INT offset = 0;
1563
1564 gcc_assert (MEM_P (ref));
1565
1566 if (reload_in_progress
1567 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1568 {
1569 base = find_replacement (&XEXP (ref, 0));
1570
1571 gcc_assert (memory_address_p (GET_MODE (ref), base));
1572 }
1573 else
1574 base = XEXP (ref, 0);
1575
1576 if (GET_CODE (base) == PLUS)
1577 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1578
1579 return plus_constant (Pmode, base, offset);
1580 }
1581
1582 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1583 X is always returned in a register. */
1584
1585 rtx
1586 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1587 {
1588 if (GET_CODE (addr) == PLUS)
1589 {
1590 ofs += INTVAL (XEXP (addr, 1));
1591 addr = XEXP (addr, 0);
1592 }
1593
1594 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1595 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1596 }
1597
1598 /* On the Alpha, all (non-symbolic) constants except zero go into
1599 a floating-point register via memory. Note that we cannot
1600 return anything that is not a subset of RCLASS, and that some
1601 symbolic constants cannot be dropped to memory. */
1602
1603 enum reg_class
1604 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1605 {
1606 /* Zero is present in any register class. */
1607 if (x == CONST0_RTX (GET_MODE (x)))
1608 return rclass;
1609
1610 /* These sorts of constants we can easily drop to memory. */
1611 if (CONST_INT_P (x)
1612 || GET_CODE (x) == CONST_DOUBLE
1613 || GET_CODE (x) == CONST_VECTOR)
1614 {
1615 if (rclass == FLOAT_REGS)
1616 return NO_REGS;
1617 if (rclass == ALL_REGS)
1618 return GENERAL_REGS;
1619 return rclass;
1620 }
1621
1622 /* All other kinds of constants should not (and in the case of HIGH
1623 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1624 secondary reload. */
1625 if (CONSTANT_P (x))
1626 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1627
1628 return rclass;
1629 }
1630
1631 /* Inform reload about cases where moving X with a mode MODE to a register in
1632 RCLASS requires an extra scratch or immediate register. Return the class
1633 needed for the immediate register. */
1634
1635 static reg_class_t
1636 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1637 enum machine_mode mode, secondary_reload_info *sri)
1638 {
1639 enum reg_class rclass = (enum reg_class) rclass_i;
1640
1641 /* Loading and storing HImode or QImode values to and from memory
1642 usually requires a scratch register. */
1643 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1644 {
1645 if (any_memory_operand (x, mode))
1646 {
1647 if (in_p)
1648 {
1649 if (!aligned_memory_operand (x, mode))
1650 sri->icode = direct_optab_handler (reload_in_optab, mode);
1651 }
1652 else
1653 sri->icode = direct_optab_handler (reload_out_optab, mode);
1654 return NO_REGS;
1655 }
1656 }
1657
1658 /* We also cannot do integral arithmetic into FP regs, as might result
1659 from register elimination into a DImode fp register. */
1660 if (rclass == FLOAT_REGS)
1661 {
1662 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1663 return GENERAL_REGS;
1664 if (in_p && INTEGRAL_MODE_P (mode)
1665 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1666 return GENERAL_REGS;
1667 }
1668
1669 return NO_REGS;
1670 }
1671 \f
1672 /* Subfunction of the following function. Update the flags of any MEM
1673 found in part of X. */
1674
1675 static int
1676 alpha_set_memflags_1 (rtx *xp, void *data)
1677 {
1678 rtx x = *xp, orig = (rtx) data;
1679
1680 if (!MEM_P (x))
1681 return 0;
1682
1683 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1684 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1685 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1686
1687 /* Sadly, we cannot use alias sets because the extra aliasing
1688 produced by the AND interferes. Given that two-byte quantities
1689 are the only thing we would be able to differentiate anyway,
1690 there does not seem to be any point in convoluting the early
1691 out of the alias check. */
1692
1693 return -1;
1694 }
1695
1696 /* Given SEQ, which is an INSN list, look for any MEMs in either
1697 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1698 volatile flags from REF into each of the MEMs found. If REF is not
1699 a MEM, don't do anything. */
1700
1701 void
1702 alpha_set_memflags (rtx seq, rtx ref)
1703 {
1704 rtx_insn *insn;
1705
1706 if (!MEM_P (ref))
1707 return;
1708
1709 /* This is only called from alpha.md, after having had something
1710 generated from one of the insn patterns. So if everything is
1711 zero, the pattern is already up-to-date. */
1712 if (!MEM_VOLATILE_P (ref)
1713 && !MEM_NOTRAP_P (ref)
1714 && !MEM_READONLY_P (ref))
1715 return;
1716
1717 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1718 if (INSN_P (insn))
1719 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1720 else
1721 gcc_unreachable ();
1722 }
1723 \f
1724 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1725 int, bool);
1726
1727 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1728 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1729 and return pc_rtx if successful. */
1730
1731 static rtx
1732 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1733 HOST_WIDE_INT c, int n, bool no_output)
1734 {
1735 HOST_WIDE_INT new_const;
1736 int i, bits;
1737 /* Use a pseudo if highly optimizing and still generating RTL. */
1738 rtx subtarget
1739 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1740 rtx temp, insn;
1741
1742 /* If this is a sign-extended 32-bit constant, we can do this in at most
1743 three insns, so do it if we have enough insns left. We always have
1744 a sign-extended 32-bit constant when compiling on a narrow machine. */
1745
1746 if (HOST_BITS_PER_WIDE_INT != 64
1747 || c >> 31 == -1 || c >> 31 == 0)
1748 {
1749 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1750 HOST_WIDE_INT tmp1 = c - low;
1751 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1752 HOST_WIDE_INT extra = 0;
1753
1754 /* If HIGH will be interpreted as negative but the constant is
1755 positive, we must adjust it to do two ldha insns. */
1756
1757 if ((high & 0x8000) != 0 && c >= 0)
1758 {
1759 extra = 0x4000;
1760 tmp1 -= 0x40000000;
1761 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1762 }
1763
1764 if (c == low || (low == 0 && extra == 0))
1765 {
1766 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1767 but that meant that we can't handle INT_MIN on 32-bit machines
1768 (like NT/Alpha), because we recurse indefinitely through
1769 emit_move_insn to gen_movdi. So instead, since we know exactly
1770 what we want, create it explicitly. */
1771
1772 if (no_output)
1773 return pc_rtx;
1774 if (target == NULL)
1775 target = gen_reg_rtx (mode);
1776 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1777 return target;
1778 }
1779 else if (n >= 2 + (extra != 0))
1780 {
1781 if (no_output)
1782 return pc_rtx;
1783 if (!can_create_pseudo_p ())
1784 {
1785 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1786 temp = target;
1787 }
1788 else
1789 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1790 subtarget, mode);
1791
1792 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1793 This means that if we go through expand_binop, we'll try to
1794 generate extensions, etc, which will require new pseudos, which
1795 will fail during some split phases. The SImode add patterns
1796 still exist, but are not named. So build the insns by hand. */
1797
1798 if (extra != 0)
1799 {
1800 if (! subtarget)
1801 subtarget = gen_reg_rtx (mode);
1802 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1803 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1804 emit_insn (insn);
1805 temp = subtarget;
1806 }
1807
1808 if (target == NULL)
1809 target = gen_reg_rtx (mode);
1810 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1811 insn = gen_rtx_SET (VOIDmode, target, insn);
1812 emit_insn (insn);
1813 return target;
1814 }
1815 }
1816
1817 /* If we couldn't do it that way, try some other methods. But if we have
1818 no instructions left, don't bother. Likewise, if this is SImode and
1819 we can't make pseudos, we can't do anything since the expand_binop
1820 and expand_unop calls will widen and try to make pseudos. */
1821
1822 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1823 return 0;
1824
1825 /* Next, see if we can load a related constant and then shift and possibly
1826 negate it to get the constant we want. Try this once each increasing
1827 numbers of insns. */
1828
1829 for (i = 1; i < n; i++)
1830 {
1831 /* First, see if minus some low bits, we've an easy load of
1832 high bits. */
1833
1834 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1835 if (new_const != 0)
1836 {
1837 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1838 if (temp)
1839 {
1840 if (no_output)
1841 return temp;
1842 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1843 target, 0, OPTAB_WIDEN);
1844 }
1845 }
1846
1847 /* Next try complementing. */
1848 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1849 if (temp)
1850 {
1851 if (no_output)
1852 return temp;
1853 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1854 }
1855
1856 /* Next try to form a constant and do a left shift. We can do this
1857 if some low-order bits are zero; the exact_log2 call below tells
1858 us that information. The bits we are shifting out could be any
1859 value, but here we'll just try the 0- and sign-extended forms of
1860 the constant. To try to increase the chance of having the same
1861 constant in more than one insn, start at the highest number of
1862 bits to shift, but try all possibilities in case a ZAPNOT will
1863 be useful. */
1864
1865 bits = exact_log2 (c & -c);
1866 if (bits > 0)
1867 for (; bits > 0; bits--)
1868 {
1869 new_const = c >> bits;
1870 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1871 if (!temp && c < 0)
1872 {
1873 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1874 temp = alpha_emit_set_const (subtarget, mode, new_const,
1875 i, no_output);
1876 }
1877 if (temp)
1878 {
1879 if (no_output)
1880 return temp;
1881 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1882 target, 0, OPTAB_WIDEN);
1883 }
1884 }
1885
1886 /* Now try high-order zero bits. Here we try the shifted-in bits as
1887 all zero and all ones. Be careful to avoid shifting outside the
1888 mode and to avoid shifting outside the host wide int size. */
1889 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1890 confuse the recursive call and set all of the high 32 bits. */
1891
1892 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1893 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1894 if (bits > 0)
1895 for (; bits > 0; bits--)
1896 {
1897 new_const = c << bits;
1898 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1899 if (!temp)
1900 {
1901 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1902 temp = alpha_emit_set_const (subtarget, mode, new_const,
1903 i, no_output);
1904 }
1905 if (temp)
1906 {
1907 if (no_output)
1908 return temp;
1909 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1910 target, 1, OPTAB_WIDEN);
1911 }
1912 }
1913
1914 /* Now try high-order 1 bits. We get that with a sign-extension.
1915 But one bit isn't enough here. Be careful to avoid shifting outside
1916 the mode and to avoid shifting outside the host wide int size. */
1917
1918 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1919 - floor_log2 (~ c) - 2);
1920 if (bits > 0)
1921 for (; bits > 0; bits--)
1922 {
1923 new_const = c << bits;
1924 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1925 if (!temp)
1926 {
1927 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1928 temp = alpha_emit_set_const (subtarget, mode, new_const,
1929 i, no_output);
1930 }
1931 if (temp)
1932 {
1933 if (no_output)
1934 return temp;
1935 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1936 target, 0, OPTAB_WIDEN);
1937 }
1938 }
1939 }
1940
1941 #if HOST_BITS_PER_WIDE_INT == 64
1942 /* Finally, see if can load a value into the target that is the same as the
1943 constant except that all bytes that are 0 are changed to be 0xff. If we
1944 can, then we can do a ZAPNOT to obtain the desired constant. */
1945
1946 new_const = c;
1947 for (i = 0; i < 64; i += 8)
1948 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1949 new_const |= (HOST_WIDE_INT) 0xff << i;
1950
1951 /* We are only called for SImode and DImode. If this is SImode, ensure that
1952 we are sign extended to a full word. */
1953
1954 if (mode == SImode)
1955 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1956
1957 if (new_const != c)
1958 {
1959 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1960 if (temp)
1961 {
1962 if (no_output)
1963 return temp;
1964 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1965 target, 0, OPTAB_WIDEN);
1966 }
1967 }
1968 #endif
1969
1970 return 0;
1971 }
1972
1973 /* Try to output insns to set TARGET equal to the constant C if it can be
1974 done in less than N insns. Do all computations in MODE. Returns the place
1975 where the output has been placed if it can be done and the insns have been
1976 emitted. If it would take more than N insns, zero is returned and no
1977 insns and emitted. */
1978
1979 static rtx
1980 alpha_emit_set_const (rtx target, enum machine_mode mode,
1981 HOST_WIDE_INT c, int n, bool no_output)
1982 {
1983 enum machine_mode orig_mode = mode;
1984 rtx orig_target = target;
1985 rtx result = 0;
1986 int i;
1987
1988 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1989 can't load this constant in one insn, do this in DImode. */
1990 if (!can_create_pseudo_p () && mode == SImode
1991 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1992 {
1993 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1994 if (result)
1995 return result;
1996
1997 target = no_output ? NULL : gen_lowpart (DImode, target);
1998 mode = DImode;
1999 }
2000 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2001 {
2002 target = no_output ? NULL : gen_lowpart (DImode, target);
2003 mode = DImode;
2004 }
2005
2006 /* Try 1 insn, then 2, then up to N. */
2007 for (i = 1; i <= n; i++)
2008 {
2009 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2010 if (result)
2011 {
2012 rtx_insn *insn;
2013 rtx set;
2014
2015 if (no_output)
2016 return result;
2017
2018 insn = get_last_insn ();
2019 set = single_set (insn);
2020 if (! CONSTANT_P (SET_SRC (set)))
2021 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2022 break;
2023 }
2024 }
2025
2026 /* Allow for the case where we changed the mode of TARGET. */
2027 if (result)
2028 {
2029 if (result == target)
2030 result = orig_target;
2031 else if (mode != orig_mode)
2032 result = gen_lowpart (orig_mode, result);
2033 }
2034
2035 return result;
2036 }
2037
2038 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2039 fall back to a straight forward decomposition. We do this to avoid
2040 exponential run times encountered when looking for longer sequences
2041 with alpha_emit_set_const. */
2042
2043 static rtx
2044 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
2045 {
2046 HOST_WIDE_INT d1, d2, d3, d4;
2047
2048 /* Decompose the entire word */
2049 #if HOST_BITS_PER_WIDE_INT >= 64
2050 gcc_assert (c2 == -(c1 < 0));
2051 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2052 c1 -= d1;
2053 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2054 c1 = (c1 - d2) >> 32;
2055 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2056 c1 -= d3;
2057 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2058 gcc_assert (c1 == d4);
2059 #else
2060 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2061 c1 -= d1;
2062 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2063 gcc_assert (c1 == d2);
2064 c2 += (d2 < 0);
2065 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
2066 c2 -= d3;
2067 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2068 gcc_assert (c2 == d4);
2069 #endif
2070
2071 /* Construct the high word */
2072 if (d4)
2073 {
2074 emit_move_insn (target, GEN_INT (d4));
2075 if (d3)
2076 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2077 }
2078 else
2079 emit_move_insn (target, GEN_INT (d3));
2080
2081 /* Shift it into place */
2082 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2083
2084 /* Add in the low bits. */
2085 if (d2)
2086 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2087 if (d1)
2088 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2089
2090 return target;
2091 }
2092
2093 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
2094 the low 64 bits. */
2095
2096 static void
2097 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
2098 {
2099 HOST_WIDE_INT i0, i1;
2100
2101 if (GET_CODE (x) == CONST_VECTOR)
2102 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2103
2104
2105 if (CONST_INT_P (x))
2106 {
2107 i0 = INTVAL (x);
2108 i1 = -(i0 < 0);
2109 }
2110 else if (HOST_BITS_PER_WIDE_INT >= 64)
2111 {
2112 i0 = CONST_DOUBLE_LOW (x);
2113 i1 = -(i0 < 0);
2114 }
2115 else
2116 {
2117 i0 = CONST_DOUBLE_LOW (x);
2118 i1 = CONST_DOUBLE_HIGH (x);
2119 }
2120
2121 *p0 = i0;
2122 *p1 = i1;
2123 }
2124
2125 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2126 we are willing to load the value into a register via a move pattern.
2127 Normally this is all symbolic constants, integral constants that
2128 take three or fewer instructions, and floating-point zero. */
2129
2130 bool
2131 alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
2132 {
2133 HOST_WIDE_INT i0, i1;
2134
2135 switch (GET_CODE (x))
2136 {
2137 case LABEL_REF:
2138 case HIGH:
2139 return true;
2140
2141 case CONST:
2142 if (GET_CODE (XEXP (x, 0)) == PLUS
2143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2144 x = XEXP (XEXP (x, 0), 0);
2145 else
2146 return true;
2147
2148 if (GET_CODE (x) != SYMBOL_REF)
2149 return true;
2150
2151 /* FALLTHRU */
2152
2153 case SYMBOL_REF:
2154 /* TLS symbols are never valid. */
2155 return SYMBOL_REF_TLS_MODEL (x) == 0;
2156
2157 case CONST_DOUBLE:
2158 if (x == CONST0_RTX (mode))
2159 return true;
2160 if (FLOAT_MODE_P (mode))
2161 return false;
2162 goto do_integer;
2163
2164 case CONST_VECTOR:
2165 if (x == CONST0_RTX (mode))
2166 return true;
2167 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2168 return false;
2169 if (GET_MODE_SIZE (mode) != 8)
2170 return false;
2171 goto do_integer;
2172
2173 case CONST_INT:
2174 do_integer:
2175 if (TARGET_BUILD_CONSTANTS)
2176 return true;
2177 alpha_extract_integer (x, &i0, &i1);
2178 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2179 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2180 return false;
2181
2182 default:
2183 return false;
2184 }
2185 }
2186
2187 /* Operand 1 is known to be a constant, and should require more than one
2188 instruction to load. Emit that multi-part load. */
2189
2190 bool
2191 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2192 {
2193 HOST_WIDE_INT i0, i1;
2194 rtx temp = NULL_RTX;
2195
2196 alpha_extract_integer (operands[1], &i0, &i1);
2197
2198 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2199 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2200
2201 if (!temp && TARGET_BUILD_CONSTANTS)
2202 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2203
2204 if (temp)
2205 {
2206 if (!rtx_equal_p (operands[0], temp))
2207 emit_move_insn (operands[0], temp);
2208 return true;
2209 }
2210
2211 return false;
2212 }
2213
2214 /* Expand a move instruction; return true if all work is done.
2215 We don't handle non-bwx subword loads here. */
2216
2217 bool
2218 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2219 {
2220 rtx tmp;
2221
2222 /* If the output is not a register, the input must be. */
2223 if (MEM_P (operands[0])
2224 && ! reg_or_0_operand (operands[1], mode))
2225 operands[1] = force_reg (mode, operands[1]);
2226
2227 /* Allow legitimize_address to perform some simplifications. */
2228 if (mode == Pmode && symbolic_operand (operands[1], mode))
2229 {
2230 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2231 if (tmp)
2232 {
2233 if (tmp == operands[0])
2234 return true;
2235 operands[1] = tmp;
2236 return false;
2237 }
2238 }
2239
2240 /* Early out for non-constants and valid constants. */
2241 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2242 return false;
2243
2244 /* Split large integers. */
2245 if (CONST_INT_P (operands[1])
2246 || GET_CODE (operands[1]) == CONST_DOUBLE
2247 || GET_CODE (operands[1]) == CONST_VECTOR)
2248 {
2249 if (alpha_split_const_mov (mode, operands))
2250 return true;
2251 }
2252
2253 /* Otherwise we've nothing left but to drop the thing to memory. */
2254 tmp = force_const_mem (mode, operands[1]);
2255
2256 if (tmp == NULL_RTX)
2257 return false;
2258
2259 if (reload_in_progress)
2260 {
2261 emit_move_insn (operands[0], XEXP (tmp, 0));
2262 operands[1] = replace_equiv_address (tmp, operands[0]);
2263 }
2264 else
2265 operands[1] = validize_mem (tmp);
2266 return false;
2267 }
2268
2269 /* Expand a non-bwx QImode or HImode move instruction;
2270 return true if all work is done. */
2271
2272 bool
2273 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2274 {
2275 rtx seq;
2276
2277 /* If the output is not a register, the input must be. */
2278 if (MEM_P (operands[0]))
2279 operands[1] = force_reg (mode, operands[1]);
2280
2281 /* Handle four memory cases, unaligned and aligned for either the input
2282 or the output. The only case where we can be called during reload is
2283 for aligned loads; all other cases require temporaries. */
2284
2285 if (any_memory_operand (operands[1], mode))
2286 {
2287 if (aligned_memory_operand (operands[1], mode))
2288 {
2289 if (reload_in_progress)
2290 {
2291 if (mode == QImode)
2292 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2293 else
2294 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2295 emit_insn (seq);
2296 }
2297 else
2298 {
2299 rtx aligned_mem, bitnum;
2300 rtx scratch = gen_reg_rtx (SImode);
2301 rtx subtarget;
2302 bool copyout;
2303
2304 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2305
2306 subtarget = operands[0];
2307 if (REG_P (subtarget))
2308 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2309 else
2310 subtarget = gen_reg_rtx (DImode), copyout = true;
2311
2312 if (mode == QImode)
2313 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2314 bitnum, scratch);
2315 else
2316 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2317 bitnum, scratch);
2318 emit_insn (seq);
2319
2320 if (copyout)
2321 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2322 }
2323 }
2324 else
2325 {
2326 /* Don't pass these as parameters since that makes the generated
2327 code depend on parameter evaluation order which will cause
2328 bootstrap failures. */
2329
2330 rtx temp1, temp2, subtarget, ua;
2331 bool copyout;
2332
2333 temp1 = gen_reg_rtx (DImode);
2334 temp2 = gen_reg_rtx (DImode);
2335
2336 subtarget = operands[0];
2337 if (REG_P (subtarget))
2338 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2339 else
2340 subtarget = gen_reg_rtx (DImode), copyout = true;
2341
2342 ua = get_unaligned_address (operands[1]);
2343 if (mode == QImode)
2344 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2345 else
2346 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2347
2348 alpha_set_memflags (seq, operands[1]);
2349 emit_insn (seq);
2350
2351 if (copyout)
2352 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2353 }
2354 return true;
2355 }
2356
2357 if (any_memory_operand (operands[0], mode))
2358 {
2359 if (aligned_memory_operand (operands[0], mode))
2360 {
2361 rtx aligned_mem, bitnum;
2362 rtx temp1 = gen_reg_rtx (SImode);
2363 rtx temp2 = gen_reg_rtx (SImode);
2364
2365 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2366
2367 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2368 temp1, temp2));
2369 }
2370 else
2371 {
2372 rtx temp1 = gen_reg_rtx (DImode);
2373 rtx temp2 = gen_reg_rtx (DImode);
2374 rtx temp3 = gen_reg_rtx (DImode);
2375 rtx ua = get_unaligned_address (operands[0]);
2376
2377 if (mode == QImode)
2378 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2379 else
2380 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2381
2382 alpha_set_memflags (seq, operands[0]);
2383 emit_insn (seq);
2384 }
2385 return true;
2386 }
2387
2388 return false;
2389 }
2390
2391 /* Implement the movmisalign patterns. One of the operands is a memory
2392 that is not naturally aligned. Emit instructions to load it. */
2393
2394 void
2395 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2396 {
2397 /* Honor misaligned loads, for those we promised to do so. */
2398 if (MEM_P (operands[1]))
2399 {
2400 rtx tmp;
2401
2402 if (register_operand (operands[0], mode))
2403 tmp = operands[0];
2404 else
2405 tmp = gen_reg_rtx (mode);
2406
2407 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2408 if (tmp != operands[0])
2409 emit_move_insn (operands[0], tmp);
2410 }
2411 else if (MEM_P (operands[0]))
2412 {
2413 if (!reg_or_0_operand (operands[1], mode))
2414 operands[1] = force_reg (mode, operands[1]);
2415 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2416 }
2417 else
2418 gcc_unreachable ();
2419 }
2420
2421 /* Generate an unsigned DImode to FP conversion. This is the same code
2422 optabs would emit if we didn't have TFmode patterns.
2423
2424 For SFmode, this is the only construction I've found that can pass
2425 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2426 intermediates will work, because you'll get intermediate rounding
2427 that ruins the end result. Some of this could be fixed by turning
2428 on round-to-positive-infinity, but that requires diddling the fpsr,
2429 which kills performance. I tried turning this around and converting
2430 to a negative number, so that I could turn on /m, but either I did
2431 it wrong or there's something else cause I wound up with the exact
2432 same single-bit error. There is a branch-less form of this same code:
2433
2434 srl $16,1,$1
2435 and $16,1,$2
2436 cmplt $16,0,$3
2437 or $1,$2,$2
2438 cmovge $16,$16,$2
2439 itoft $3,$f10
2440 itoft $2,$f11
2441 cvtqs $f11,$f11
2442 adds $f11,$f11,$f0
2443 fcmoveq $f10,$f11,$f0
2444
2445 I'm not using it because it's the same number of instructions as
2446 this branch-full form, and it has more serialized long latency
2447 instructions on the critical path.
2448
2449 For DFmode, we can avoid rounding errors by breaking up the word
2450 into two pieces, converting them separately, and adding them back:
2451
2452 LC0: .long 0,0x5f800000
2453
2454 itoft $16,$f11
2455 lda $2,LC0
2456 cmplt $16,0,$1
2457 cpyse $f11,$f31,$f10
2458 cpyse $f31,$f11,$f11
2459 s4addq $1,$2,$1
2460 lds $f12,0($1)
2461 cvtqt $f10,$f10
2462 cvtqt $f11,$f11
2463 addt $f12,$f10,$f0
2464 addt $f0,$f11,$f0
2465
2466 This doesn't seem to be a clear-cut win over the optabs form.
2467 It probably all depends on the distribution of numbers being
2468 converted -- in the optabs form, all but high-bit-set has a
2469 much lower minimum execution time. */
2470
2471 void
2472 alpha_emit_floatuns (rtx operands[2])
2473 {
2474 rtx neglab, donelab, i0, i1, f0, in, out;
2475 enum machine_mode mode;
2476
2477 out = operands[0];
2478 in = force_reg (DImode, operands[1]);
2479 mode = GET_MODE (out);
2480 neglab = gen_label_rtx ();
2481 donelab = gen_label_rtx ();
2482 i0 = gen_reg_rtx (DImode);
2483 i1 = gen_reg_rtx (DImode);
2484 f0 = gen_reg_rtx (mode);
2485
2486 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2487
2488 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2489 emit_jump_insn (gen_jump (donelab));
2490 emit_barrier ();
2491
2492 emit_label (neglab);
2493
2494 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2495 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2496 emit_insn (gen_iordi3 (i0, i0, i1));
2497 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2498 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2499
2500 emit_label (donelab);
2501 }
2502
2503 /* Generate the comparison for a conditional branch. */
2504
2505 void
2506 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2507 {
2508 enum rtx_code cmp_code, branch_code;
2509 enum machine_mode branch_mode = VOIDmode;
2510 enum rtx_code code = GET_CODE (operands[0]);
2511 rtx op0 = operands[1], op1 = operands[2];
2512 rtx tem;
2513
2514 if (cmp_mode == TFmode)
2515 {
2516 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2517 op1 = const0_rtx;
2518 cmp_mode = DImode;
2519 }
2520
2521 /* The general case: fold the comparison code to the types of compares
2522 that we have, choosing the branch as necessary. */
2523 switch (code)
2524 {
2525 case EQ: case LE: case LT: case LEU: case LTU:
2526 case UNORDERED:
2527 /* We have these compares. */
2528 cmp_code = code, branch_code = NE;
2529 break;
2530
2531 case NE:
2532 case ORDERED:
2533 /* These must be reversed. */
2534 cmp_code = reverse_condition (code), branch_code = EQ;
2535 break;
2536
2537 case GE: case GT: case GEU: case GTU:
2538 /* For FP, we swap them, for INT, we reverse them. */
2539 if (cmp_mode == DFmode)
2540 {
2541 cmp_code = swap_condition (code);
2542 branch_code = NE;
2543 tem = op0, op0 = op1, op1 = tem;
2544 }
2545 else
2546 {
2547 cmp_code = reverse_condition (code);
2548 branch_code = EQ;
2549 }
2550 break;
2551
2552 default:
2553 gcc_unreachable ();
2554 }
2555
2556 if (cmp_mode == DFmode)
2557 {
2558 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2559 {
2560 /* When we are not as concerned about non-finite values, and we
2561 are comparing against zero, we can branch directly. */
2562 if (op1 == CONST0_RTX (DFmode))
2563 cmp_code = UNKNOWN, branch_code = code;
2564 else if (op0 == CONST0_RTX (DFmode))
2565 {
2566 /* Undo the swap we probably did just above. */
2567 tem = op0, op0 = op1, op1 = tem;
2568 branch_code = swap_condition (cmp_code);
2569 cmp_code = UNKNOWN;
2570 }
2571 }
2572 else
2573 {
2574 /* ??? We mark the branch mode to be CCmode to prevent the
2575 compare and branch from being combined, since the compare
2576 insn follows IEEE rules that the branch does not. */
2577 branch_mode = CCmode;
2578 }
2579 }
2580 else
2581 {
2582 /* The following optimizations are only for signed compares. */
2583 if (code != LEU && code != LTU && code != GEU && code != GTU)
2584 {
2585 /* Whee. Compare and branch against 0 directly. */
2586 if (op1 == const0_rtx)
2587 cmp_code = UNKNOWN, branch_code = code;
2588
2589 /* If the constants doesn't fit into an immediate, but can
2590 be generated by lda/ldah, we adjust the argument and
2591 compare against zero, so we can use beq/bne directly. */
2592 /* ??? Don't do this when comparing against symbols, otherwise
2593 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2594 be declared false out of hand (at least for non-weak). */
2595 else if (CONST_INT_P (op1)
2596 && (code == EQ || code == NE)
2597 && !(symbolic_operand (op0, VOIDmode)
2598 || (REG_P (op0) && REG_POINTER (op0))))
2599 {
2600 rtx n_op1 = GEN_INT (-INTVAL (op1));
2601
2602 if (! satisfies_constraint_I (op1)
2603 && (satisfies_constraint_K (n_op1)
2604 || satisfies_constraint_L (n_op1)))
2605 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2606 }
2607 }
2608
2609 if (!reg_or_0_operand (op0, DImode))
2610 op0 = force_reg (DImode, op0);
2611 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2612 op1 = force_reg (DImode, op1);
2613 }
2614
2615 /* Emit an initial compare instruction, if necessary. */
2616 tem = op0;
2617 if (cmp_code != UNKNOWN)
2618 {
2619 tem = gen_reg_rtx (cmp_mode);
2620 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2621 }
2622
2623 /* Emit the branch instruction. */
2624 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2625 gen_rtx_IF_THEN_ELSE (VOIDmode,
2626 gen_rtx_fmt_ee (branch_code,
2627 branch_mode, tem,
2628 CONST0_RTX (cmp_mode)),
2629 gen_rtx_LABEL_REF (VOIDmode,
2630 operands[3]),
2631 pc_rtx));
2632 emit_jump_insn (tem);
2633 }
2634
2635 /* Certain simplifications can be done to make invalid setcc operations
2636 valid. Return the final comparison, or NULL if we can't work. */
2637
2638 bool
2639 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2640 {
2641 enum rtx_code cmp_code;
2642 enum rtx_code code = GET_CODE (operands[1]);
2643 rtx op0 = operands[2], op1 = operands[3];
2644 rtx tmp;
2645
2646 if (cmp_mode == TFmode)
2647 {
2648 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2649 op1 = const0_rtx;
2650 cmp_mode = DImode;
2651 }
2652
2653 if (cmp_mode == DFmode && !TARGET_FIX)
2654 return 0;
2655
2656 /* The general case: fold the comparison code to the types of compares
2657 that we have, choosing the branch as necessary. */
2658
2659 cmp_code = UNKNOWN;
2660 switch (code)
2661 {
2662 case EQ: case LE: case LT: case LEU: case LTU:
2663 case UNORDERED:
2664 /* We have these compares. */
2665 if (cmp_mode == DFmode)
2666 cmp_code = code, code = NE;
2667 break;
2668
2669 case NE:
2670 if (cmp_mode == DImode && op1 == const0_rtx)
2671 break;
2672 /* FALLTHRU */
2673
2674 case ORDERED:
2675 cmp_code = reverse_condition (code);
2676 code = EQ;
2677 break;
2678
2679 case GE: case GT: case GEU: case GTU:
2680 /* These normally need swapping, but for integer zero we have
2681 special patterns that recognize swapped operands. */
2682 if (cmp_mode == DImode && op1 == const0_rtx)
2683 break;
2684 code = swap_condition (code);
2685 if (cmp_mode == DFmode)
2686 cmp_code = code, code = NE;
2687 tmp = op0, op0 = op1, op1 = tmp;
2688 break;
2689
2690 default:
2691 gcc_unreachable ();
2692 }
2693
2694 if (cmp_mode == DImode)
2695 {
2696 if (!register_operand (op0, DImode))
2697 op0 = force_reg (DImode, op0);
2698 if (!reg_or_8bit_operand (op1, DImode))
2699 op1 = force_reg (DImode, op1);
2700 }
2701
2702 /* Emit an initial compare instruction, if necessary. */
2703 if (cmp_code != UNKNOWN)
2704 {
2705 tmp = gen_reg_rtx (cmp_mode);
2706 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2707 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2708
2709 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2710 op1 = const0_rtx;
2711 }
2712
2713 /* Emit the setcc instruction. */
2714 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2715 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2716 return true;
2717 }
2718
2719
2720 /* Rewrite a comparison against zero CMP of the form
2721 (CODE (cc0) (const_int 0)) so it can be written validly in
2722 a conditional move (if_then_else CMP ...).
2723 If both of the operands that set cc0 are nonzero we must emit
2724 an insn to perform the compare (it can't be done within
2725 the conditional move). */
2726
2727 rtx
2728 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2729 {
2730 enum rtx_code code = GET_CODE (cmp);
2731 enum rtx_code cmov_code = NE;
2732 rtx op0 = XEXP (cmp, 0);
2733 rtx op1 = XEXP (cmp, 1);
2734 enum machine_mode cmp_mode
2735 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2736 enum machine_mode cmov_mode = VOIDmode;
2737 int local_fast_math = flag_unsafe_math_optimizations;
2738 rtx tem;
2739
2740 if (cmp_mode == TFmode)
2741 {
2742 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2743 op1 = const0_rtx;
2744 cmp_mode = DImode;
2745 }
2746
2747 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2748
2749 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2750 {
2751 enum rtx_code cmp_code;
2752
2753 if (! TARGET_FIX)
2754 return 0;
2755
2756 /* If we have fp<->int register move instructions, do a cmov by
2757 performing the comparison in fp registers, and move the
2758 zero/nonzero value to integer registers, where we can then
2759 use a normal cmov, or vice-versa. */
2760
2761 switch (code)
2762 {
2763 case EQ: case LE: case LT: case LEU: case LTU:
2764 case UNORDERED:
2765 /* We have these compares. */
2766 cmp_code = code, code = NE;
2767 break;
2768
2769 case NE:
2770 case ORDERED:
2771 /* These must be reversed. */
2772 cmp_code = reverse_condition (code), code = EQ;
2773 break;
2774
2775 case GE: case GT: case GEU: case GTU:
2776 /* These normally need swapping, but for integer zero we have
2777 special patterns that recognize swapped operands. */
2778 if (cmp_mode == DImode && op1 == const0_rtx)
2779 cmp_code = code, code = NE;
2780 else
2781 {
2782 cmp_code = swap_condition (code);
2783 code = NE;
2784 tem = op0, op0 = op1, op1 = tem;
2785 }
2786 break;
2787
2788 default:
2789 gcc_unreachable ();
2790 }
2791
2792 if (cmp_mode == DImode)
2793 {
2794 if (!reg_or_0_operand (op0, DImode))
2795 op0 = force_reg (DImode, op0);
2796 if (!reg_or_8bit_operand (op1, DImode))
2797 op1 = force_reg (DImode, op1);
2798 }
2799
2800 tem = gen_reg_rtx (cmp_mode);
2801 emit_insn (gen_rtx_SET (VOIDmode, tem,
2802 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2803 op0, op1)));
2804
2805 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2806 op0 = gen_lowpart (cmp_mode, tem);
2807 op1 = CONST0_RTX (cmp_mode);
2808 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2809 local_fast_math = 1;
2810 }
2811
2812 if (cmp_mode == DImode)
2813 {
2814 if (!reg_or_0_operand (op0, DImode))
2815 op0 = force_reg (DImode, op0);
2816 if (!reg_or_8bit_operand (op1, DImode))
2817 op1 = force_reg (DImode, op1);
2818 }
2819
2820 /* We may be able to use a conditional move directly.
2821 This avoids emitting spurious compares. */
2822 if (signed_comparison_operator (cmp, VOIDmode)
2823 && (cmp_mode == DImode || local_fast_math)
2824 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2825 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2826
2827 /* We can't put the comparison inside the conditional move;
2828 emit a compare instruction and put that inside the
2829 conditional move. Make sure we emit only comparisons we have;
2830 swap or reverse as necessary. */
2831
2832 if (!can_create_pseudo_p ())
2833 return NULL_RTX;
2834
2835 switch (code)
2836 {
2837 case EQ: case LE: case LT: case LEU: case LTU:
2838 case UNORDERED:
2839 /* We have these compares: */
2840 break;
2841
2842 case NE:
2843 case ORDERED:
2844 /* These must be reversed. */
2845 code = reverse_condition (code);
2846 cmov_code = EQ;
2847 break;
2848
2849 case GE: case GT: case GEU: case GTU:
2850 /* These normally need swapping, but for integer zero we have
2851 special patterns that recognize swapped operands. */
2852 if (cmp_mode == DImode && op1 == const0_rtx)
2853 break;
2854 code = swap_condition (code);
2855 tem = op0, op0 = op1, op1 = tem;
2856 break;
2857
2858 default:
2859 gcc_unreachable ();
2860 }
2861
2862 if (cmp_mode == DImode)
2863 {
2864 if (!reg_or_0_operand (op0, DImode))
2865 op0 = force_reg (DImode, op0);
2866 if (!reg_or_8bit_operand (op1, DImode))
2867 op1 = force_reg (DImode, op1);
2868 }
2869
2870 /* ??? We mark the branch mode to be CCmode to prevent the compare
2871 and cmov from being combined, since the compare insn follows IEEE
2872 rules that the cmov does not. */
2873 if (cmp_mode == DFmode && !local_fast_math)
2874 cmov_mode = CCmode;
2875
2876 tem = gen_reg_rtx (cmp_mode);
2877 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2878 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2879 }
2880
2881 /* Simplify a conditional move of two constants into a setcc with
2882 arithmetic. This is done with a splitter since combine would
2883 just undo the work if done during code generation. It also catches
2884 cases we wouldn't have before cse. */
2885
2886 int
2887 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2888 rtx t_rtx, rtx f_rtx)
2889 {
2890 HOST_WIDE_INT t, f, diff;
2891 enum machine_mode mode;
2892 rtx target, subtarget, tmp;
2893
2894 mode = GET_MODE (dest);
2895 t = INTVAL (t_rtx);
2896 f = INTVAL (f_rtx);
2897 diff = t - f;
2898
2899 if (((code == NE || code == EQ) && diff < 0)
2900 || (code == GE || code == GT))
2901 {
2902 code = reverse_condition (code);
2903 diff = t, t = f, f = diff;
2904 diff = t - f;
2905 }
2906
2907 subtarget = target = dest;
2908 if (mode != DImode)
2909 {
2910 target = gen_lowpart (DImode, dest);
2911 if (can_create_pseudo_p ())
2912 subtarget = gen_reg_rtx (DImode);
2913 else
2914 subtarget = target;
2915 }
2916 /* Below, we must be careful to use copy_rtx on target and subtarget
2917 in intermediate insns, as they may be a subreg rtx, which may not
2918 be shared. */
2919
2920 if (f == 0 && exact_log2 (diff) > 0
2921 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2922 viable over a longer latency cmove. On EV5, the E0 slot is a
2923 scarce resource, and on EV4 shift has the same latency as a cmove. */
2924 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2925 {
2926 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2927 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2928
2929 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2930 GEN_INT (exact_log2 (t)));
2931 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2932 }
2933 else if (f == 0 && t == -1)
2934 {
2935 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2936 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2937
2938 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2939 }
2940 else if (diff == 1 || diff == 4 || diff == 8)
2941 {
2942 rtx add_op;
2943
2944 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2945 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2946
2947 if (diff == 1)
2948 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2949 else
2950 {
2951 add_op = GEN_INT (f);
2952 if (sext_add_operand (add_op, mode))
2953 {
2954 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2955 GEN_INT (diff));
2956 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2957 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2958 }
2959 else
2960 return 0;
2961 }
2962 }
2963 else
2964 return 0;
2965
2966 return 1;
2967 }
2968 \f
2969 /* Look up the function X_floating library function name for the
2970 given operation. */
2971
2972 struct GTY(()) xfloating_op
2973 {
2974 const enum rtx_code code;
2975 const char *const GTY((skip)) osf_func;
2976 const char *const GTY((skip)) vms_func;
2977 rtx libcall;
2978 };
2979
2980 static GTY(()) struct xfloating_op xfloating_ops[] =
2981 {
2982 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2983 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2984 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2985 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2986 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2987 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2988 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2989 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2990 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2991 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2992 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2993 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2994 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2995 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2996 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2997 };
2998
2999 static GTY(()) struct xfloating_op vax_cvt_ops[] =
3000 {
3001 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3002 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3003 };
3004
3005 static rtx
3006 alpha_lookup_xfloating_lib_func (enum rtx_code code)
3007 {
3008 struct xfloating_op *ops = xfloating_ops;
3009 long n = ARRAY_SIZE (xfloating_ops);
3010 long i;
3011
3012 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3013
3014 /* How irritating. Nothing to key off for the main table. */
3015 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3016 {
3017 ops = vax_cvt_ops;
3018 n = ARRAY_SIZE (vax_cvt_ops);
3019 }
3020
3021 for (i = 0; i < n; ++i, ++ops)
3022 if (ops->code == code)
3023 {
3024 rtx func = ops->libcall;
3025 if (!func)
3026 {
3027 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3028 ? ops->vms_func : ops->osf_func);
3029 ops->libcall = func;
3030 }
3031 return func;
3032 }
3033
3034 gcc_unreachable ();
3035 }
3036
3037 /* Most X_floating operations take the rounding mode as an argument.
3038 Compute that here. */
3039
3040 static int
3041 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3042 enum alpha_fp_rounding_mode round)
3043 {
3044 int mode;
3045
3046 switch (round)
3047 {
3048 case ALPHA_FPRM_NORM:
3049 mode = 2;
3050 break;
3051 case ALPHA_FPRM_MINF:
3052 mode = 1;
3053 break;
3054 case ALPHA_FPRM_CHOP:
3055 mode = 0;
3056 break;
3057 case ALPHA_FPRM_DYN:
3058 mode = 4;
3059 break;
3060 default:
3061 gcc_unreachable ();
3062
3063 /* XXX For reference, round to +inf is mode = 3. */
3064 }
3065
3066 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3067 mode |= 0x10000;
3068
3069 return mode;
3070 }
3071
3072 /* Emit an X_floating library function call.
3073
3074 Note that these functions do not follow normal calling conventions:
3075 TFmode arguments are passed in two integer registers (as opposed to
3076 indirect); TFmode return values appear in R16+R17.
3077
3078 FUNC is the function to call.
3079 TARGET is where the output belongs.
3080 OPERANDS are the inputs.
3081 NOPERANDS is the count of inputs.
3082 EQUIV is the expression equivalent for the function.
3083 */
3084
3085 static void
3086 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3087 int noperands, rtx equiv)
3088 {
3089 rtx usage = NULL_RTX, tmp, reg;
3090 int regno = 16, i;
3091
3092 start_sequence ();
3093
3094 for (i = 0; i < noperands; ++i)
3095 {
3096 switch (GET_MODE (operands[i]))
3097 {
3098 case TFmode:
3099 reg = gen_rtx_REG (TFmode, regno);
3100 regno += 2;
3101 break;
3102
3103 case DFmode:
3104 reg = gen_rtx_REG (DFmode, regno + 32);
3105 regno += 1;
3106 break;
3107
3108 case VOIDmode:
3109 gcc_assert (CONST_INT_P (operands[i]));
3110 /* FALLTHRU */
3111 case DImode:
3112 reg = gen_rtx_REG (DImode, regno);
3113 regno += 1;
3114 break;
3115
3116 default:
3117 gcc_unreachable ();
3118 }
3119
3120 emit_move_insn (reg, operands[i]);
3121 use_reg (&usage, reg);
3122 }
3123
3124 switch (GET_MODE (target))
3125 {
3126 case TFmode:
3127 reg = gen_rtx_REG (TFmode, 16);
3128 break;
3129 case DFmode:
3130 reg = gen_rtx_REG (DFmode, 32);
3131 break;
3132 case DImode:
3133 reg = gen_rtx_REG (DImode, 0);
3134 break;
3135 default:
3136 gcc_unreachable ();
3137 }
3138
3139 tmp = gen_rtx_MEM (QImode, func);
3140 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3141 const0_rtx, const0_rtx));
3142 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3143 RTL_CONST_CALL_P (tmp) = 1;
3144
3145 tmp = get_insns ();
3146 end_sequence ();
3147
3148 emit_libcall_block (tmp, target, reg, equiv);
3149 }
3150
3151 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3152
3153 void
3154 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3155 {
3156 rtx func;
3157 int mode;
3158 rtx out_operands[3];
3159
3160 func = alpha_lookup_xfloating_lib_func (code);
3161 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3162
3163 out_operands[0] = operands[1];
3164 out_operands[1] = operands[2];
3165 out_operands[2] = GEN_INT (mode);
3166 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3167 gen_rtx_fmt_ee (code, TFmode, operands[1],
3168 operands[2]));
3169 }
3170
3171 /* Emit an X_floating library function call for a comparison. */
3172
3173 static rtx
3174 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3175 {
3176 enum rtx_code cmp_code, res_code;
3177 rtx func, out, operands[2], note;
3178
3179 /* X_floating library comparison functions return
3180 -1 unordered
3181 0 false
3182 1 true
3183 Convert the compare against the raw return value. */
3184
3185 cmp_code = *pcode;
3186 switch (cmp_code)
3187 {
3188 case UNORDERED:
3189 cmp_code = EQ;
3190 res_code = LT;
3191 break;
3192 case ORDERED:
3193 cmp_code = EQ;
3194 res_code = GE;
3195 break;
3196 case NE:
3197 res_code = NE;
3198 break;
3199 case EQ:
3200 case LT:
3201 case GT:
3202 case LE:
3203 case GE:
3204 res_code = GT;
3205 break;
3206 default:
3207 gcc_unreachable ();
3208 }
3209 *pcode = res_code;
3210
3211 func = alpha_lookup_xfloating_lib_func (cmp_code);
3212
3213 operands[0] = op0;
3214 operands[1] = op1;
3215 out = gen_reg_rtx (DImode);
3216
3217 /* What's actually returned is -1,0,1, not a proper boolean value. */
3218 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3219 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3220 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3221
3222 return out;
3223 }
3224
3225 /* Emit an X_floating library function call for a conversion. */
3226
3227 void
3228 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3229 {
3230 int noperands = 1, mode;
3231 rtx out_operands[2];
3232 rtx func;
3233 enum rtx_code code = orig_code;
3234
3235 if (code == UNSIGNED_FIX)
3236 code = FIX;
3237
3238 func = alpha_lookup_xfloating_lib_func (code);
3239
3240 out_operands[0] = operands[1];
3241
3242 switch (code)
3243 {
3244 case FIX:
3245 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3246 out_operands[1] = GEN_INT (mode);
3247 noperands = 2;
3248 break;
3249 case FLOAT_TRUNCATE:
3250 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3251 out_operands[1] = GEN_INT (mode);
3252 noperands = 2;
3253 break;
3254 default:
3255 break;
3256 }
3257
3258 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3259 gen_rtx_fmt_e (orig_code,
3260 GET_MODE (operands[0]),
3261 operands[1]));
3262 }
3263
3264 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3265 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3266 guarantee that the sequence
3267 set (OP[0] OP[2])
3268 set (OP[1] OP[3])
3269 is valid. Naturally, output operand ordering is little-endian.
3270 This is used by *movtf_internal and *movti_internal. */
3271
3272 void
3273 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3274 bool fixup_overlap)
3275 {
3276 switch (GET_CODE (operands[1]))
3277 {
3278 case REG:
3279 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3280 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3281 break;
3282
3283 case MEM:
3284 operands[3] = adjust_address (operands[1], DImode, 8);
3285 operands[2] = adjust_address (operands[1], DImode, 0);
3286 break;
3287
3288 case CONST_INT:
3289 case CONST_DOUBLE:
3290 gcc_assert (operands[1] == CONST0_RTX (mode));
3291 operands[2] = operands[3] = const0_rtx;
3292 break;
3293
3294 default:
3295 gcc_unreachable ();
3296 }
3297
3298 switch (GET_CODE (operands[0]))
3299 {
3300 case REG:
3301 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3302 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3303 break;
3304
3305 case MEM:
3306 operands[1] = adjust_address (operands[0], DImode, 8);
3307 operands[0] = adjust_address (operands[0], DImode, 0);
3308 break;
3309
3310 default:
3311 gcc_unreachable ();
3312 }
3313
3314 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3315 {
3316 rtx tmp;
3317 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3318 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3319 }
3320 }
3321
3322 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3323 op2 is a register containing the sign bit, operation is the
3324 logical operation to be performed. */
3325
3326 void
3327 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3328 {
3329 rtx high_bit = operands[2];
3330 rtx scratch;
3331 int move;
3332
3333 alpha_split_tmode_pair (operands, TFmode, false);
3334
3335 /* Detect three flavors of operand overlap. */
3336 move = 1;
3337 if (rtx_equal_p (operands[0], operands[2]))
3338 move = 0;
3339 else if (rtx_equal_p (operands[1], operands[2]))
3340 {
3341 if (rtx_equal_p (operands[0], high_bit))
3342 move = 2;
3343 else
3344 move = -1;
3345 }
3346
3347 if (move < 0)
3348 emit_move_insn (operands[0], operands[2]);
3349
3350 /* ??? If the destination overlaps both source tf and high_bit, then
3351 assume source tf is dead in its entirety and use the other half
3352 for a scratch register. Otherwise "scratch" is just the proper
3353 destination register. */
3354 scratch = operands[move < 2 ? 1 : 3];
3355
3356 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3357
3358 if (move > 0)
3359 {
3360 emit_move_insn (operands[0], operands[2]);
3361 if (move > 1)
3362 emit_move_insn (operands[1], scratch);
3363 }
3364 }
3365 \f
3366 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3367 unaligned data:
3368
3369 unsigned: signed:
3370 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3371 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3372 lda r3,X(r11) lda r3,X+2(r11)
3373 extwl r1,r3,r1 extql r1,r3,r1
3374 extwh r2,r3,r2 extqh r2,r3,r2
3375 or r1.r2.r1 or r1,r2,r1
3376 sra r1,48,r1
3377
3378 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3379 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3380 lda r3,X(r11) lda r3,X(r11)
3381 extll r1,r3,r1 extll r1,r3,r1
3382 extlh r2,r3,r2 extlh r2,r3,r2
3383 or r1.r2.r1 addl r1,r2,r1
3384
3385 quad: ldq_u r1,X(r11)
3386 ldq_u r2,X+7(r11)
3387 lda r3,X(r11)
3388 extql r1,r3,r1
3389 extqh r2,r3,r2
3390 or r1.r2.r1
3391 */
3392
3393 void
3394 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3395 HOST_WIDE_INT ofs, int sign)
3396 {
3397 rtx meml, memh, addr, extl, exth, tmp, mema;
3398 enum machine_mode mode;
3399
3400 if (TARGET_BWX && size == 2)
3401 {
3402 meml = adjust_address (mem, QImode, ofs);
3403 memh = adjust_address (mem, QImode, ofs+1);
3404 extl = gen_reg_rtx (DImode);
3405 exth = gen_reg_rtx (DImode);
3406 emit_insn (gen_zero_extendqidi2 (extl, meml));
3407 emit_insn (gen_zero_extendqidi2 (exth, memh));
3408 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3409 NULL, 1, OPTAB_LIB_WIDEN);
3410 addr = expand_simple_binop (DImode, IOR, extl, exth,
3411 NULL, 1, OPTAB_LIB_WIDEN);
3412
3413 if (sign && GET_MODE (tgt) != HImode)
3414 {
3415 addr = gen_lowpart (HImode, addr);
3416 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3417 }
3418 else
3419 {
3420 if (GET_MODE (tgt) != DImode)
3421 addr = gen_lowpart (GET_MODE (tgt), addr);
3422 emit_move_insn (tgt, addr);
3423 }
3424 return;
3425 }
3426
3427 meml = gen_reg_rtx (DImode);
3428 memh = gen_reg_rtx (DImode);
3429 addr = gen_reg_rtx (DImode);
3430 extl = gen_reg_rtx (DImode);
3431 exth = gen_reg_rtx (DImode);
3432
3433 mema = XEXP (mem, 0);
3434 if (GET_CODE (mema) == LO_SUM)
3435 mema = force_reg (Pmode, mema);
3436
3437 /* AND addresses cannot be in any alias set, since they may implicitly
3438 alias surrounding code. Ideally we'd have some alias set that
3439 covered all types except those with alignment 8 or higher. */
3440
3441 tmp = change_address (mem, DImode,
3442 gen_rtx_AND (DImode,
3443 plus_constant (DImode, mema, ofs),
3444 GEN_INT (-8)));
3445 set_mem_alias_set (tmp, 0);
3446 emit_move_insn (meml, tmp);
3447
3448 tmp = change_address (mem, DImode,
3449 gen_rtx_AND (DImode,
3450 plus_constant (DImode, mema,
3451 ofs + size - 1),
3452 GEN_INT (-8)));
3453 set_mem_alias_set (tmp, 0);
3454 emit_move_insn (memh, tmp);
3455
3456 if (sign && size == 2)
3457 {
3458 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3459
3460 emit_insn (gen_extql (extl, meml, addr));
3461 emit_insn (gen_extqh (exth, memh, addr));
3462
3463 /* We must use tgt here for the target. Alpha-vms port fails if we use
3464 addr for the target, because addr is marked as a pointer and combine
3465 knows that pointers are always sign-extended 32-bit values. */
3466 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3467 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3468 addr, 1, OPTAB_WIDEN);
3469 }
3470 else
3471 {
3472 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3473 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3474 switch ((int) size)
3475 {
3476 case 2:
3477 emit_insn (gen_extwh (exth, memh, addr));
3478 mode = HImode;
3479 break;
3480 case 4:
3481 emit_insn (gen_extlh (exth, memh, addr));
3482 mode = SImode;
3483 break;
3484 case 8:
3485 emit_insn (gen_extqh (exth, memh, addr));
3486 mode = DImode;
3487 break;
3488 default:
3489 gcc_unreachable ();
3490 }
3491
3492 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3493 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3494 sign, OPTAB_WIDEN);
3495 }
3496
3497 if (addr != tgt)
3498 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3499 }
3500
3501 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3502
3503 void
3504 alpha_expand_unaligned_store (rtx dst, rtx src,
3505 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3506 {
3507 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3508
3509 if (TARGET_BWX && size == 2)
3510 {
3511 if (src != const0_rtx)
3512 {
3513 dstl = gen_lowpart (QImode, src);
3514 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3515 NULL, 1, OPTAB_LIB_WIDEN);
3516 dsth = gen_lowpart (QImode, dsth);
3517 }
3518 else
3519 dstl = dsth = const0_rtx;
3520
3521 meml = adjust_address (dst, QImode, ofs);
3522 memh = adjust_address (dst, QImode, ofs+1);
3523
3524 emit_move_insn (meml, dstl);
3525 emit_move_insn (memh, dsth);
3526 return;
3527 }
3528
3529 dstl = gen_reg_rtx (DImode);
3530 dsth = gen_reg_rtx (DImode);
3531 insl = gen_reg_rtx (DImode);
3532 insh = gen_reg_rtx (DImode);
3533
3534 dsta = XEXP (dst, 0);
3535 if (GET_CODE (dsta) == LO_SUM)
3536 dsta = force_reg (Pmode, dsta);
3537
3538 /* AND addresses cannot be in any alias set, since they may implicitly
3539 alias surrounding code. Ideally we'd have some alias set that
3540 covered all types except those with alignment 8 or higher. */
3541
3542 meml = change_address (dst, DImode,
3543 gen_rtx_AND (DImode,
3544 plus_constant (DImode, dsta, ofs),
3545 GEN_INT (-8)));
3546 set_mem_alias_set (meml, 0);
3547
3548 memh = change_address (dst, DImode,
3549 gen_rtx_AND (DImode,
3550 plus_constant (DImode, dsta,
3551 ofs + size - 1),
3552 GEN_INT (-8)));
3553 set_mem_alias_set (memh, 0);
3554
3555 emit_move_insn (dsth, memh);
3556 emit_move_insn (dstl, meml);
3557
3558 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3559
3560 if (src != CONST0_RTX (GET_MODE (src)))
3561 {
3562 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3563 GEN_INT (size*8), addr));
3564
3565 switch ((int) size)
3566 {
3567 case 2:
3568 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3569 break;
3570 case 4:
3571 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3572 break;
3573 case 8:
3574 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3575 break;
3576 default:
3577 gcc_unreachable ();
3578 }
3579 }
3580
3581 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3582
3583 switch ((int) size)
3584 {
3585 case 2:
3586 emit_insn (gen_mskwl (dstl, dstl, addr));
3587 break;
3588 case 4:
3589 emit_insn (gen_mskll (dstl, dstl, addr));
3590 break;
3591 case 8:
3592 emit_insn (gen_mskql (dstl, dstl, addr));
3593 break;
3594 default:
3595 gcc_unreachable ();
3596 }
3597
3598 if (src != CONST0_RTX (GET_MODE (src)))
3599 {
3600 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3601 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3602 }
3603
3604 /* Must store high before low for degenerate case of aligned. */
3605 emit_move_insn (memh, dsth);
3606 emit_move_insn (meml, dstl);
3607 }
3608
3609 /* The block move code tries to maximize speed by separating loads and
3610 stores at the expense of register pressure: we load all of the data
3611 before we store it back out. There are two secondary effects worth
3612 mentioning, that this speeds copying to/from aligned and unaligned
3613 buffers, and that it makes the code significantly easier to write. */
3614
3615 #define MAX_MOVE_WORDS 8
3616
3617 /* Load an integral number of consecutive unaligned quadwords. */
3618
3619 static void
3620 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3621 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3622 {
3623 rtx const im8 = GEN_INT (-8);
3624 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3625 rtx sreg, areg, tmp, smema;
3626 HOST_WIDE_INT i;
3627
3628 smema = XEXP (smem, 0);
3629 if (GET_CODE (smema) == LO_SUM)
3630 smema = force_reg (Pmode, smema);
3631
3632 /* Generate all the tmp registers we need. */
3633 for (i = 0; i < words; ++i)
3634 {
3635 data_regs[i] = out_regs[i];
3636 ext_tmps[i] = gen_reg_rtx (DImode);
3637 }
3638 data_regs[words] = gen_reg_rtx (DImode);
3639
3640 if (ofs != 0)
3641 smem = adjust_address (smem, GET_MODE (smem), ofs);
3642
3643 /* Load up all of the source data. */
3644 for (i = 0; i < words; ++i)
3645 {
3646 tmp = change_address (smem, DImode,
3647 gen_rtx_AND (DImode,
3648 plus_constant (DImode, smema, 8*i),
3649 im8));
3650 set_mem_alias_set (tmp, 0);
3651 emit_move_insn (data_regs[i], tmp);
3652 }
3653
3654 tmp = change_address (smem, DImode,
3655 gen_rtx_AND (DImode,
3656 plus_constant (DImode, smema,
3657 8*words - 1),
3658 im8));
3659 set_mem_alias_set (tmp, 0);
3660 emit_move_insn (data_regs[words], tmp);
3661
3662 /* Extract the half-word fragments. Unfortunately DEC decided to make
3663 extxh with offset zero a noop instead of zeroing the register, so
3664 we must take care of that edge condition ourselves with cmov. */
3665
3666 sreg = copy_addr_to_reg (smema);
3667 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3668 1, OPTAB_WIDEN);
3669 for (i = 0; i < words; ++i)
3670 {
3671 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3672 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3673 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3674 gen_rtx_IF_THEN_ELSE (DImode,
3675 gen_rtx_EQ (DImode, areg,
3676 const0_rtx),
3677 const0_rtx, ext_tmps[i])));
3678 }
3679
3680 /* Merge the half-words into whole words. */
3681 for (i = 0; i < words; ++i)
3682 {
3683 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3684 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3685 }
3686 }
3687
3688 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3689 may be NULL to store zeros. */
3690
3691 static void
3692 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3693 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3694 {
3695 rtx const im8 = GEN_INT (-8);
3696 rtx ins_tmps[MAX_MOVE_WORDS];
3697 rtx st_tmp_1, st_tmp_2, dreg;
3698 rtx st_addr_1, st_addr_2, dmema;
3699 HOST_WIDE_INT i;
3700
3701 dmema = XEXP (dmem, 0);
3702 if (GET_CODE (dmema) == LO_SUM)
3703 dmema = force_reg (Pmode, dmema);
3704
3705 /* Generate all the tmp registers we need. */
3706 if (data_regs != NULL)
3707 for (i = 0; i < words; ++i)
3708 ins_tmps[i] = gen_reg_rtx(DImode);
3709 st_tmp_1 = gen_reg_rtx(DImode);
3710 st_tmp_2 = gen_reg_rtx(DImode);
3711
3712 if (ofs != 0)
3713 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3714
3715 st_addr_2 = change_address (dmem, DImode,
3716 gen_rtx_AND (DImode,
3717 plus_constant (DImode, dmema,
3718 words*8 - 1),
3719 im8));
3720 set_mem_alias_set (st_addr_2, 0);
3721
3722 st_addr_1 = change_address (dmem, DImode,
3723 gen_rtx_AND (DImode, dmema, im8));
3724 set_mem_alias_set (st_addr_1, 0);
3725
3726 /* Load up the destination end bits. */
3727 emit_move_insn (st_tmp_2, st_addr_2);
3728 emit_move_insn (st_tmp_1, st_addr_1);
3729
3730 /* Shift the input data into place. */
3731 dreg = copy_addr_to_reg (dmema);
3732 if (data_regs != NULL)
3733 {
3734 for (i = words-1; i >= 0; --i)
3735 {
3736 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3737 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3738 }
3739 for (i = words-1; i > 0; --i)
3740 {
3741 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3742 ins_tmps[i-1], ins_tmps[i-1], 1,
3743 OPTAB_WIDEN);
3744 }
3745 }
3746
3747 /* Split and merge the ends with the destination data. */
3748 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3749 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3750
3751 if (data_regs != NULL)
3752 {
3753 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3754 st_tmp_2, 1, OPTAB_WIDEN);
3755 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3756 st_tmp_1, 1, OPTAB_WIDEN);
3757 }
3758
3759 /* Store it all. */
3760 emit_move_insn (st_addr_2, st_tmp_2);
3761 for (i = words-1; i > 0; --i)
3762 {
3763 rtx tmp = change_address (dmem, DImode,
3764 gen_rtx_AND (DImode,
3765 plus_constant (DImode,
3766 dmema, i*8),
3767 im8));
3768 set_mem_alias_set (tmp, 0);
3769 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3770 }
3771 emit_move_insn (st_addr_1, st_tmp_1);
3772 }
3773
3774
3775 /* Expand string/block move operations.
3776
3777 operands[0] is the pointer to the destination.
3778 operands[1] is the pointer to the source.
3779 operands[2] is the number of bytes to move.
3780 operands[3] is the alignment. */
3781
3782 int
3783 alpha_expand_block_move (rtx operands[])
3784 {
3785 rtx bytes_rtx = operands[2];
3786 rtx align_rtx = operands[3];
3787 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3788 HOST_WIDE_INT bytes = orig_bytes;
3789 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3790 HOST_WIDE_INT dst_align = src_align;
3791 rtx orig_src = operands[1];
3792 rtx orig_dst = operands[0];
3793 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3794 rtx tmp;
3795 unsigned int i, words, ofs, nregs = 0;
3796
3797 if (orig_bytes <= 0)
3798 return 1;
3799 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3800 return 0;
3801
3802 /* Look for additional alignment information from recorded register info. */
3803
3804 tmp = XEXP (orig_src, 0);
3805 if (REG_P (tmp))
3806 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3807 else if (GET_CODE (tmp) == PLUS
3808 && REG_P (XEXP (tmp, 0))
3809 && CONST_INT_P (XEXP (tmp, 1)))
3810 {
3811 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3812 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3813
3814 if (a > src_align)
3815 {
3816 if (a >= 64 && c % 8 == 0)
3817 src_align = 64;
3818 else if (a >= 32 && c % 4 == 0)
3819 src_align = 32;
3820 else if (a >= 16 && c % 2 == 0)
3821 src_align = 16;
3822 }
3823 }
3824
3825 tmp = XEXP (orig_dst, 0);
3826 if (REG_P (tmp))
3827 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3828 else if (GET_CODE (tmp) == PLUS
3829 && REG_P (XEXP (tmp, 0))
3830 && CONST_INT_P (XEXP (tmp, 1)))
3831 {
3832 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3833 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3834
3835 if (a > dst_align)
3836 {
3837 if (a >= 64 && c % 8 == 0)
3838 dst_align = 64;
3839 else if (a >= 32 && c % 4 == 0)
3840 dst_align = 32;
3841 else if (a >= 16 && c % 2 == 0)
3842 dst_align = 16;
3843 }
3844 }
3845
3846 ofs = 0;
3847 if (src_align >= 64 && bytes >= 8)
3848 {
3849 words = bytes / 8;
3850
3851 for (i = 0; i < words; ++i)
3852 data_regs[nregs + i] = gen_reg_rtx (DImode);
3853
3854 for (i = 0; i < words; ++i)
3855 emit_move_insn (data_regs[nregs + i],
3856 adjust_address (orig_src, DImode, ofs + i * 8));
3857
3858 nregs += words;
3859 bytes -= words * 8;
3860 ofs += words * 8;
3861 }
3862
3863 if (src_align >= 32 && bytes >= 4)
3864 {
3865 words = bytes / 4;
3866
3867 for (i = 0; i < words; ++i)
3868 data_regs[nregs + i] = gen_reg_rtx (SImode);
3869
3870 for (i = 0; i < words; ++i)
3871 emit_move_insn (data_regs[nregs + i],
3872 adjust_address (orig_src, SImode, ofs + i * 4));
3873
3874 nregs += words;
3875 bytes -= words * 4;
3876 ofs += words * 4;
3877 }
3878
3879 if (bytes >= 8)
3880 {
3881 words = bytes / 8;
3882
3883 for (i = 0; i < words+1; ++i)
3884 data_regs[nregs + i] = gen_reg_rtx (DImode);
3885
3886 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3887 words, ofs);
3888
3889 nregs += words;
3890 bytes -= words * 8;
3891 ofs += words * 8;
3892 }
3893
3894 if (! TARGET_BWX && bytes >= 4)
3895 {
3896 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3897 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3898 bytes -= 4;
3899 ofs += 4;
3900 }
3901
3902 if (bytes >= 2)
3903 {
3904 if (src_align >= 16)
3905 {
3906 do {
3907 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3908 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3909 bytes -= 2;
3910 ofs += 2;
3911 } while (bytes >= 2);
3912 }
3913 else if (! TARGET_BWX)
3914 {
3915 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3916 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3917 bytes -= 2;
3918 ofs += 2;
3919 }
3920 }
3921
3922 while (bytes > 0)
3923 {
3924 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3925 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3926 bytes -= 1;
3927 ofs += 1;
3928 }
3929
3930 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3931
3932 /* Now save it back out again. */
3933
3934 i = 0, ofs = 0;
3935
3936 /* Write out the data in whatever chunks reading the source allowed. */
3937 if (dst_align >= 64)
3938 {
3939 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3940 {
3941 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3942 data_regs[i]);
3943 ofs += 8;
3944 i++;
3945 }
3946 }
3947
3948 if (dst_align >= 32)
3949 {
3950 /* If the source has remaining DImode regs, write them out in
3951 two pieces. */
3952 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3953 {
3954 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3955 NULL_RTX, 1, OPTAB_WIDEN);
3956
3957 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3958 gen_lowpart (SImode, data_regs[i]));
3959 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3960 gen_lowpart (SImode, tmp));
3961 ofs += 8;
3962 i++;
3963 }
3964
3965 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3966 {
3967 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3968 data_regs[i]);
3969 ofs += 4;
3970 i++;
3971 }
3972 }
3973
3974 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3975 {
3976 /* Write out a remaining block of words using unaligned methods. */
3977
3978 for (words = 1; i + words < nregs; words++)
3979 if (GET_MODE (data_regs[i + words]) != DImode)
3980 break;
3981
3982 if (words == 1)
3983 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3984 else
3985 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3986 words, ofs);
3987
3988 i += words;
3989 ofs += words * 8;
3990 }
3991
3992 /* Due to the above, this won't be aligned. */
3993 /* ??? If we have more than one of these, consider constructing full
3994 words in registers and using alpha_expand_unaligned_store_words. */
3995 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3996 {
3997 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3998 ofs += 4;
3999 i++;
4000 }
4001
4002 if (dst_align >= 16)
4003 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4004 {
4005 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4006 i++;
4007 ofs += 2;
4008 }
4009 else
4010 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4011 {
4012 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4013 i++;
4014 ofs += 2;
4015 }
4016
4017 /* The remainder must be byte copies. */
4018 while (i < nregs)
4019 {
4020 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4021 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4022 i++;
4023 ofs += 1;
4024 }
4025
4026 return 1;
4027 }
4028
4029 int
4030 alpha_expand_block_clear (rtx operands[])
4031 {
4032 rtx bytes_rtx = operands[1];
4033 rtx align_rtx = operands[3];
4034 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4035 HOST_WIDE_INT bytes = orig_bytes;
4036 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4037 HOST_WIDE_INT alignofs = 0;
4038 rtx orig_dst = operands[0];
4039 rtx tmp;
4040 int i, words, ofs = 0;
4041
4042 if (orig_bytes <= 0)
4043 return 1;
4044 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4045 return 0;
4046
4047 /* Look for stricter alignment. */
4048 tmp = XEXP (orig_dst, 0);
4049 if (REG_P (tmp))
4050 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4051 else if (GET_CODE (tmp) == PLUS
4052 && REG_P (XEXP (tmp, 0))
4053 && CONST_INT_P (XEXP (tmp, 1)))
4054 {
4055 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4056 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4057
4058 if (a > align)
4059 {
4060 if (a >= 64)
4061 align = a, alignofs = 8 - c % 8;
4062 else if (a >= 32)
4063 align = a, alignofs = 4 - c % 4;
4064 else if (a >= 16)
4065 align = a, alignofs = 2 - c % 2;
4066 }
4067 }
4068
4069 /* Handle an unaligned prefix first. */
4070
4071 if (alignofs > 0)
4072 {
4073 #if HOST_BITS_PER_WIDE_INT >= 64
4074 /* Given that alignofs is bounded by align, the only time BWX could
4075 generate three stores is for a 7 byte fill. Prefer two individual
4076 stores over a load/mask/store sequence. */
4077 if ((!TARGET_BWX || alignofs == 7)
4078 && align >= 32
4079 && !(alignofs == 4 && bytes >= 4))
4080 {
4081 enum machine_mode mode = (align >= 64 ? DImode : SImode);
4082 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4083 rtx mem, tmp;
4084 HOST_WIDE_INT mask;
4085
4086 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4087 set_mem_alias_set (mem, 0);
4088
4089 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
4090 if (bytes < alignofs)
4091 {
4092 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
4093 ofs += bytes;
4094 bytes = 0;
4095 }
4096 else
4097 {
4098 bytes -= alignofs;
4099 ofs += alignofs;
4100 }
4101 alignofs = 0;
4102
4103 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4104 NULL_RTX, 1, OPTAB_WIDEN);
4105
4106 emit_move_insn (mem, tmp);
4107 }
4108 #endif
4109
4110 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4111 {
4112 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4113 bytes -= 1;
4114 ofs += 1;
4115 alignofs -= 1;
4116 }
4117 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4118 {
4119 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4120 bytes -= 2;
4121 ofs += 2;
4122 alignofs -= 2;
4123 }
4124 if (alignofs == 4 && bytes >= 4)
4125 {
4126 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4127 bytes -= 4;
4128 ofs += 4;
4129 alignofs = 0;
4130 }
4131
4132 /* If we've not used the extra lead alignment information by now,
4133 we won't be able to. Downgrade align to match what's left over. */
4134 if (alignofs > 0)
4135 {
4136 alignofs = alignofs & -alignofs;
4137 align = MIN (align, alignofs * BITS_PER_UNIT);
4138 }
4139 }
4140
4141 /* Handle a block of contiguous long-words. */
4142
4143 if (align >= 64 && bytes >= 8)
4144 {
4145 words = bytes / 8;
4146
4147 for (i = 0; i < words; ++i)
4148 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4149 const0_rtx);
4150
4151 bytes -= words * 8;
4152 ofs += words * 8;
4153 }
4154
4155 /* If the block is large and appropriately aligned, emit a single
4156 store followed by a sequence of stq_u insns. */
4157
4158 if (align >= 32 && bytes > 16)
4159 {
4160 rtx orig_dsta;
4161
4162 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4163 bytes -= 4;
4164 ofs += 4;
4165
4166 orig_dsta = XEXP (orig_dst, 0);
4167 if (GET_CODE (orig_dsta) == LO_SUM)
4168 orig_dsta = force_reg (Pmode, orig_dsta);
4169
4170 words = bytes / 8;
4171 for (i = 0; i < words; ++i)
4172 {
4173 rtx mem
4174 = change_address (orig_dst, DImode,
4175 gen_rtx_AND (DImode,
4176 plus_constant (DImode, orig_dsta,
4177 ofs + i*8),
4178 GEN_INT (-8)));
4179 set_mem_alias_set (mem, 0);
4180 emit_move_insn (mem, const0_rtx);
4181 }
4182
4183 /* Depending on the alignment, the first stq_u may have overlapped
4184 with the initial stl, which means that the last stq_u didn't
4185 write as much as it would appear. Leave those questionable bytes
4186 unaccounted for. */
4187 bytes -= words * 8 - 4;
4188 ofs += words * 8 - 4;
4189 }
4190
4191 /* Handle a smaller block of aligned words. */
4192
4193 if ((align >= 64 && bytes == 4)
4194 || (align == 32 && bytes >= 4))
4195 {
4196 words = bytes / 4;
4197
4198 for (i = 0; i < words; ++i)
4199 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4200 const0_rtx);
4201
4202 bytes -= words * 4;
4203 ofs += words * 4;
4204 }
4205
4206 /* An unaligned block uses stq_u stores for as many as possible. */
4207
4208 if (bytes >= 8)
4209 {
4210 words = bytes / 8;
4211
4212 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4213
4214 bytes -= words * 8;
4215 ofs += words * 8;
4216 }
4217
4218 /* Next clean up any trailing pieces. */
4219
4220 #if HOST_BITS_PER_WIDE_INT >= 64
4221 /* Count the number of bits in BYTES for which aligned stores could
4222 be emitted. */
4223 words = 0;
4224 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4225 if (bytes & i)
4226 words += 1;
4227
4228 /* If we have appropriate alignment (and it wouldn't take too many
4229 instructions otherwise), mask out the bytes we need. */
4230 if (TARGET_BWX ? words > 2 : bytes > 0)
4231 {
4232 if (align >= 64)
4233 {
4234 rtx mem, tmp;
4235 HOST_WIDE_INT mask;
4236
4237 mem = adjust_address (orig_dst, DImode, ofs);
4238 set_mem_alias_set (mem, 0);
4239
4240 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4241
4242 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4243 NULL_RTX, 1, OPTAB_WIDEN);
4244
4245 emit_move_insn (mem, tmp);
4246 return 1;
4247 }
4248 else if (align >= 32 && bytes < 4)
4249 {
4250 rtx mem, tmp;
4251 HOST_WIDE_INT mask;
4252
4253 mem = adjust_address (orig_dst, SImode, ofs);
4254 set_mem_alias_set (mem, 0);
4255
4256 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4257
4258 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4259 NULL_RTX, 1, OPTAB_WIDEN);
4260
4261 emit_move_insn (mem, tmp);
4262 return 1;
4263 }
4264 }
4265 #endif
4266
4267 if (!TARGET_BWX && bytes >= 4)
4268 {
4269 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4270 bytes -= 4;
4271 ofs += 4;
4272 }
4273
4274 if (bytes >= 2)
4275 {
4276 if (align >= 16)
4277 {
4278 do {
4279 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4280 const0_rtx);
4281 bytes -= 2;
4282 ofs += 2;
4283 } while (bytes >= 2);
4284 }
4285 else if (! TARGET_BWX)
4286 {
4287 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4288 bytes -= 2;
4289 ofs += 2;
4290 }
4291 }
4292
4293 while (bytes > 0)
4294 {
4295 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4296 bytes -= 1;
4297 ofs += 1;
4298 }
4299
4300 return 1;
4301 }
4302
4303 /* Returns a mask so that zap(x, value) == x & mask. */
4304
4305 rtx
4306 alpha_expand_zap_mask (HOST_WIDE_INT value)
4307 {
4308 rtx result;
4309 int i;
4310
4311 if (HOST_BITS_PER_WIDE_INT >= 64)
4312 {
4313 HOST_WIDE_INT mask = 0;
4314
4315 for (i = 7; i >= 0; --i)
4316 {
4317 mask <<= 8;
4318 if (!((value >> i) & 1))
4319 mask |= 0xff;
4320 }
4321
4322 result = gen_int_mode (mask, DImode);
4323 }
4324 else
4325 {
4326 HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4327
4328 gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4329
4330 for (i = 7; i >= 4; --i)
4331 {
4332 mask_hi <<= 8;
4333 if (!((value >> i) & 1))
4334 mask_hi |= 0xff;
4335 }
4336
4337 for (i = 3; i >= 0; --i)
4338 {
4339 mask_lo <<= 8;
4340 if (!((value >> i) & 1))
4341 mask_lo |= 0xff;
4342 }
4343
4344 result = immed_double_const (mask_lo, mask_hi, DImode);
4345 }
4346
4347 return result;
4348 }
4349
4350 void
4351 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4352 enum machine_mode mode,
4353 rtx op0, rtx op1, rtx op2)
4354 {
4355 op0 = gen_lowpart (mode, op0);
4356
4357 if (op1 == const0_rtx)
4358 op1 = CONST0_RTX (mode);
4359 else
4360 op1 = gen_lowpart (mode, op1);
4361
4362 if (op2 == const0_rtx)
4363 op2 = CONST0_RTX (mode);
4364 else
4365 op2 = gen_lowpart (mode, op2);
4366
4367 emit_insn ((*gen) (op0, op1, op2));
4368 }
4369
4370 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4371 COND is true. Mark the jump as unlikely to be taken. */
4372
4373 static void
4374 emit_unlikely_jump (rtx cond, rtx label)
4375 {
4376 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4377 rtx x;
4378
4379 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4380 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4381 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
4382 }
4383
4384 /* A subroutine of the atomic operation splitters. Emit a load-locked
4385 instruction in MODE. */
4386
4387 static void
4388 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4389 {
4390 rtx (*fn) (rtx, rtx) = NULL;
4391 if (mode == SImode)
4392 fn = gen_load_locked_si;
4393 else if (mode == DImode)
4394 fn = gen_load_locked_di;
4395 emit_insn (fn (reg, mem));
4396 }
4397
4398 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4399 instruction in MODE. */
4400
4401 static void
4402 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4403 {
4404 rtx (*fn) (rtx, rtx, rtx) = NULL;
4405 if (mode == SImode)
4406 fn = gen_store_conditional_si;
4407 else if (mode == DImode)
4408 fn = gen_store_conditional_di;
4409 emit_insn (fn (res, mem, val));
4410 }
4411
4412 /* Subroutines of the atomic operation splitters. Emit barriers
4413 as needed for the memory MODEL. */
4414
4415 static void
4416 alpha_pre_atomic_barrier (enum memmodel model)
4417 {
4418 if (need_atomic_barrier_p (model, true))
4419 emit_insn (gen_memory_barrier ());
4420 }
4421
4422 static void
4423 alpha_post_atomic_barrier (enum memmodel model)
4424 {
4425 if (need_atomic_barrier_p (model, false))
4426 emit_insn (gen_memory_barrier ());
4427 }
4428
4429 /* A subroutine of the atomic operation splitters. Emit an insxl
4430 instruction in MODE. */
4431
4432 static rtx
4433 emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4434 {
4435 rtx ret = gen_reg_rtx (DImode);
4436 rtx (*fn) (rtx, rtx, rtx);
4437
4438 switch (mode)
4439 {
4440 case QImode:
4441 fn = gen_insbl;
4442 break;
4443 case HImode:
4444 fn = gen_inswl;
4445 break;
4446 case SImode:
4447 fn = gen_insll;
4448 break;
4449 case DImode:
4450 fn = gen_insql;
4451 break;
4452 default:
4453 gcc_unreachable ();
4454 }
4455
4456 op1 = force_reg (mode, op1);
4457 emit_insn (fn (ret, op1, op2));
4458
4459 return ret;
4460 }
4461
4462 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4463 to perform. MEM is the memory on which to operate. VAL is the second
4464 operand of the binary operator. BEFORE and AFTER are optional locations to
4465 return the value of MEM either before of after the operation. SCRATCH is
4466 a scratch register. */
4467
4468 void
4469 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4470 rtx after, rtx scratch, enum memmodel model)
4471 {
4472 enum machine_mode mode = GET_MODE (mem);
4473 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4474
4475 alpha_pre_atomic_barrier (model);
4476
4477 label = gen_label_rtx ();
4478 emit_label (label);
4479 label = gen_rtx_LABEL_REF (DImode, label);
4480
4481 if (before == NULL)
4482 before = scratch;
4483 emit_load_locked (mode, before, mem);
4484
4485 if (code == NOT)
4486 {
4487 x = gen_rtx_AND (mode, before, val);
4488 emit_insn (gen_rtx_SET (VOIDmode, val, x));
4489
4490 x = gen_rtx_NOT (mode, val);
4491 }
4492 else
4493 x = gen_rtx_fmt_ee (code, mode, before, val);
4494 if (after)
4495 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4496 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4497
4498 emit_store_conditional (mode, cond, mem, scratch);
4499
4500 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4501 emit_unlikely_jump (x, label);
4502
4503 alpha_post_atomic_barrier (model);
4504 }
4505
4506 /* Expand a compare and swap operation. */
4507
4508 void
4509 alpha_split_compare_and_swap (rtx operands[])
4510 {
4511 rtx cond, retval, mem, oldval, newval;
4512 bool is_weak;
4513 enum memmodel mod_s, mod_f;
4514 enum machine_mode mode;
4515 rtx label1, label2, x;
4516
4517 cond = operands[0];
4518 retval = operands[1];
4519 mem = operands[2];
4520 oldval = operands[3];
4521 newval = operands[4];
4522 is_weak = (operands[5] != const0_rtx);
4523 mod_s = (enum memmodel) INTVAL (operands[6]);
4524 mod_f = (enum memmodel) INTVAL (operands[7]);
4525 mode = GET_MODE (mem);
4526
4527 alpha_pre_atomic_barrier (mod_s);
4528
4529 label1 = NULL_RTX;
4530 if (!is_weak)
4531 {
4532 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4533 emit_label (XEXP (label1, 0));
4534 }
4535 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4536
4537 emit_load_locked (mode, retval, mem);
4538
4539 x = gen_lowpart (DImode, retval);
4540 if (oldval == const0_rtx)
4541 {
4542 emit_move_insn (cond, const0_rtx);
4543 x = gen_rtx_NE (DImode, x, const0_rtx);
4544 }
4545 else
4546 {
4547 x = gen_rtx_EQ (DImode, x, oldval);
4548 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4549 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4550 }
4551 emit_unlikely_jump (x, label2);
4552
4553 emit_move_insn (cond, newval);
4554 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4555
4556 if (!is_weak)
4557 {
4558 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4559 emit_unlikely_jump (x, label1);
4560 }
4561
4562 if (mod_f != MEMMODEL_RELAXED)
4563 emit_label (XEXP (label2, 0));
4564
4565 alpha_post_atomic_barrier (mod_s);
4566
4567 if (mod_f == MEMMODEL_RELAXED)
4568 emit_label (XEXP (label2, 0));
4569 }
4570
4571 void
4572 alpha_expand_compare_and_swap_12 (rtx operands[])
4573 {
4574 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4575 enum machine_mode mode;
4576 rtx addr, align, wdst;
4577 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4578
4579 cond = operands[0];
4580 dst = operands[1];
4581 mem = operands[2];
4582 oldval = operands[3];
4583 newval = operands[4];
4584 is_weak = operands[5];
4585 mod_s = operands[6];
4586 mod_f = operands[7];
4587 mode = GET_MODE (mem);
4588
4589 /* We forced the address into a register via mem_noofs_operand. */
4590 addr = XEXP (mem, 0);
4591 gcc_assert (register_operand (addr, DImode));
4592
4593 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4594 NULL_RTX, 1, OPTAB_DIRECT);
4595
4596 oldval = convert_modes (DImode, mode, oldval, 1);
4597
4598 if (newval != const0_rtx)
4599 newval = emit_insxl (mode, newval, addr);
4600
4601 wdst = gen_reg_rtx (DImode);
4602 if (mode == QImode)
4603 gen = gen_atomic_compare_and_swapqi_1;
4604 else
4605 gen = gen_atomic_compare_and_swaphi_1;
4606 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4607 is_weak, mod_s, mod_f));
4608
4609 emit_move_insn (dst, gen_lowpart (mode, wdst));
4610 }
4611
4612 void
4613 alpha_split_compare_and_swap_12 (rtx operands[])
4614 {
4615 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4616 enum machine_mode mode;
4617 bool is_weak;
4618 enum memmodel mod_s, mod_f;
4619 rtx label1, label2, mem, addr, width, mask, x;
4620
4621 cond = operands[0];
4622 dest = operands[1];
4623 orig_mem = operands[2];
4624 oldval = operands[3];
4625 newval = operands[4];
4626 align = operands[5];
4627 is_weak = (operands[6] != const0_rtx);
4628 mod_s = (enum memmodel) INTVAL (operands[7]);
4629 mod_f = (enum memmodel) INTVAL (operands[8]);
4630 scratch = operands[9];
4631 mode = GET_MODE (orig_mem);
4632 addr = XEXP (orig_mem, 0);
4633
4634 mem = gen_rtx_MEM (DImode, align);
4635 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4636 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4637 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4638
4639 alpha_pre_atomic_barrier (mod_s);
4640
4641 label1 = NULL_RTX;
4642 if (!is_weak)
4643 {
4644 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4645 emit_label (XEXP (label1, 0));
4646 }
4647 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4648
4649 emit_load_locked (DImode, scratch, mem);
4650
4651 width = GEN_INT (GET_MODE_BITSIZE (mode));
4652 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4653 emit_insn (gen_extxl (dest, scratch, width, addr));
4654
4655 if (oldval == const0_rtx)
4656 {
4657 emit_move_insn (cond, const0_rtx);
4658 x = gen_rtx_NE (DImode, dest, const0_rtx);
4659 }
4660 else
4661 {
4662 x = gen_rtx_EQ (DImode, dest, oldval);
4663 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4664 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4665 }
4666 emit_unlikely_jump (x, label2);
4667
4668 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4669
4670 if (newval != const0_rtx)
4671 emit_insn (gen_iordi3 (cond, cond, newval));
4672
4673 emit_store_conditional (DImode, cond, mem, cond);
4674
4675 if (!is_weak)
4676 {
4677 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4678 emit_unlikely_jump (x, label1);
4679 }
4680
4681 if (mod_f != MEMMODEL_RELAXED)
4682 emit_label (XEXP (label2, 0));
4683
4684 alpha_post_atomic_barrier (mod_s);
4685
4686 if (mod_f == MEMMODEL_RELAXED)
4687 emit_label (XEXP (label2, 0));
4688 }
4689
4690 /* Expand an atomic exchange operation. */
4691
4692 void
4693 alpha_split_atomic_exchange (rtx operands[])
4694 {
4695 rtx retval, mem, val, scratch;
4696 enum memmodel model;
4697 enum machine_mode mode;
4698 rtx label, x, cond;
4699
4700 retval = operands[0];
4701 mem = operands[1];
4702 val = operands[2];
4703 model = (enum memmodel) INTVAL (operands[3]);
4704 scratch = operands[4];
4705 mode = GET_MODE (mem);
4706 cond = gen_lowpart (DImode, scratch);
4707
4708 alpha_pre_atomic_barrier (model);
4709
4710 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4711 emit_label (XEXP (label, 0));
4712
4713 emit_load_locked (mode, retval, mem);
4714 emit_move_insn (scratch, val);
4715 emit_store_conditional (mode, cond, mem, scratch);
4716
4717 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4718 emit_unlikely_jump (x, label);
4719
4720 alpha_post_atomic_barrier (model);
4721 }
4722
4723 void
4724 alpha_expand_atomic_exchange_12 (rtx operands[])
4725 {
4726 rtx dst, mem, val, model;
4727 enum machine_mode mode;
4728 rtx addr, align, wdst;
4729 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4730
4731 dst = operands[0];
4732 mem = operands[1];
4733 val = operands[2];
4734 model = operands[3];
4735 mode = GET_MODE (mem);
4736
4737 /* We forced the address into a register via mem_noofs_operand. */
4738 addr = XEXP (mem, 0);
4739 gcc_assert (register_operand (addr, DImode));
4740
4741 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4742 NULL_RTX, 1, OPTAB_DIRECT);
4743
4744 /* Insert val into the correct byte location within the word. */
4745 if (val != const0_rtx)
4746 val = emit_insxl (mode, val, addr);
4747
4748 wdst = gen_reg_rtx (DImode);
4749 if (mode == QImode)
4750 gen = gen_atomic_exchangeqi_1;
4751 else
4752 gen = gen_atomic_exchangehi_1;
4753 emit_insn (gen (wdst, mem, val, align, model));
4754
4755 emit_move_insn (dst, gen_lowpart (mode, wdst));
4756 }
4757
4758 void
4759 alpha_split_atomic_exchange_12 (rtx operands[])
4760 {
4761 rtx dest, orig_mem, addr, val, align, scratch;
4762 rtx label, mem, width, mask, x;
4763 enum machine_mode mode;
4764 enum memmodel model;
4765
4766 dest = operands[0];
4767 orig_mem = operands[1];
4768 val = operands[2];
4769 align = operands[3];
4770 model = (enum memmodel) INTVAL (operands[4]);
4771 scratch = operands[5];
4772 mode = GET_MODE (orig_mem);
4773 addr = XEXP (orig_mem, 0);
4774
4775 mem = gen_rtx_MEM (DImode, align);
4776 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4777 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4778 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4779
4780 alpha_pre_atomic_barrier (model);
4781
4782 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4783 emit_label (XEXP (label, 0));
4784
4785 emit_load_locked (DImode, scratch, mem);
4786
4787 width = GEN_INT (GET_MODE_BITSIZE (mode));
4788 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4789 emit_insn (gen_extxl (dest, scratch, width, addr));
4790 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4791 if (val != const0_rtx)
4792 emit_insn (gen_iordi3 (scratch, scratch, val));
4793
4794 emit_store_conditional (DImode, scratch, mem, scratch);
4795
4796 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4797 emit_unlikely_jump (x, label);
4798
4799 alpha_post_atomic_barrier (model);
4800 }
4801 \f
4802 /* Adjust the cost of a scheduling dependency. Return the new cost of
4803 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4804
4805 static int
4806 alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4807 {
4808 enum attr_type dep_insn_type;
4809
4810 /* If the dependence is an anti-dependence, there is no cost. For an
4811 output dependence, there is sometimes a cost, but it doesn't seem
4812 worth handling those few cases. */
4813 if (REG_NOTE_KIND (link) != 0)
4814 return cost;
4815
4816 /* If we can't recognize the insns, we can't really do anything. */
4817 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4818 return cost;
4819
4820 dep_insn_type = get_attr_type (dep_insn);
4821
4822 /* Bring in the user-defined memory latency. */
4823 if (dep_insn_type == TYPE_ILD
4824 || dep_insn_type == TYPE_FLD
4825 || dep_insn_type == TYPE_LDSYM)
4826 cost += alpha_memory_latency-1;
4827
4828 /* Everything else handled in DFA bypasses now. */
4829
4830 return cost;
4831 }
4832
4833 /* The number of instructions that can be issued per cycle. */
4834
4835 static int
4836 alpha_issue_rate (void)
4837 {
4838 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4839 }
4840
4841 /* How many alternative schedules to try. This should be as wide as the
4842 scheduling freedom in the DFA, but no wider. Making this value too
4843 large results extra work for the scheduler.
4844
4845 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4846 alternative schedules. For EV5, we can choose between E0/E1 and
4847 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4848
4849 static int
4850 alpha_multipass_dfa_lookahead (void)
4851 {
4852 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4853 }
4854 \f
4855 /* Machine-specific function data. */
4856
4857 struct GTY(()) alpha_links;
4858
4859 struct GTY(()) machine_function
4860 {
4861 /* For OSF. */
4862 const char *some_ld_name;
4863
4864 /* For flag_reorder_blocks_and_partition. */
4865 rtx gp_save_rtx;
4866
4867 /* For VMS condition handlers. */
4868 bool uses_condition_handler;
4869
4870 /* Linkage entries. */
4871 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
4872 links;
4873 };
4874
4875 /* How to allocate a 'struct machine_function'. */
4876
4877 static struct machine_function *
4878 alpha_init_machine_status (void)
4879 {
4880 return ggc_cleared_alloc<machine_function> ();
4881 }
4882
4883 /* Support for frame based VMS condition handlers. */
4884
4885 /* A VMS condition handler may be established for a function with a call to
4886 __builtin_establish_vms_condition_handler, and cancelled with a call to
4887 __builtin_revert_vms_condition_handler.
4888
4889 The VMS Condition Handling Facility knows about the existence of a handler
4890 from the procedure descriptor .handler field. As the VMS native compilers,
4891 we store the user specified handler's address at a fixed location in the
4892 stack frame and point the procedure descriptor at a common wrapper which
4893 fetches the real handler's address and issues an indirect call.
4894
4895 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4896
4897 We force the procedure kind to PT_STACK, and the fixed frame location is
4898 fp+8, just before the register save area. We use the handler_data field in
4899 the procedure descriptor to state the fp offset at which the installed
4900 handler address can be found. */
4901
4902 #define VMS_COND_HANDLER_FP_OFFSET 8
4903
4904 /* Expand code to store the currently installed user VMS condition handler
4905 into TARGET and install HANDLER as the new condition handler. */
4906
4907 void
4908 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4909 {
4910 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4911 VMS_COND_HANDLER_FP_OFFSET);
4912
4913 rtx handler_slot
4914 = gen_rtx_MEM (DImode, handler_slot_address);
4915
4916 emit_move_insn (target, handler_slot);
4917 emit_move_insn (handler_slot, handler);
4918
4919 /* Notify the start/prologue/epilogue emitters that the condition handler
4920 slot is needed. In addition to reserving the slot space, this will force
4921 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4922 use above is correct. */
4923 cfun->machine->uses_condition_handler = true;
4924 }
4925
4926 /* Expand code to store the current VMS condition handler into TARGET and
4927 nullify it. */
4928
4929 void
4930 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4931 {
4932 /* We implement this by establishing a null condition handler, with the tiny
4933 side effect of setting uses_condition_handler. This is a little bit
4934 pessimistic if no actual builtin_establish call is ever issued, which is
4935 not a real problem and expected never to happen anyway. */
4936
4937 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4938 }
4939
4940 /* Functions to save and restore alpha_return_addr_rtx. */
4941
4942 /* Start the ball rolling with RETURN_ADDR_RTX. */
4943
4944 rtx
4945 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4946 {
4947 if (count != 0)
4948 return const0_rtx;
4949
4950 return get_hard_reg_initial_val (Pmode, REG_RA);
4951 }
4952
4953 /* Return or create a memory slot containing the gp value for the current
4954 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4955
4956 rtx
4957 alpha_gp_save_rtx (void)
4958 {
4959 rtx_insn *seq;
4960 rtx m = cfun->machine->gp_save_rtx;
4961
4962 if (m == NULL)
4963 {
4964 start_sequence ();
4965
4966 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4967 m = validize_mem (m);
4968 emit_move_insn (m, pic_offset_table_rtx);
4969
4970 seq = get_insns ();
4971 end_sequence ();
4972
4973 /* We used to simply emit the sequence after entry_of_function.
4974 However this breaks the CFG if the first instruction in the
4975 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4976 label. Emit the sequence properly on the edge. We are only
4977 invoked from dw2_build_landing_pads and finish_eh_generation
4978 will call commit_edge_insertions thanks to a kludge. */
4979 insert_insn_on_edge (seq,
4980 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4981
4982 cfun->machine->gp_save_rtx = m;
4983 }
4984
4985 return m;
4986 }
4987
4988 static void
4989 alpha_instantiate_decls (void)
4990 {
4991 if (cfun->machine->gp_save_rtx != NULL_RTX)
4992 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4993 }
4994
4995 static int
4996 alpha_ra_ever_killed (void)
4997 {
4998 rtx_insn *top;
4999
5000 if (!has_hard_reg_initial_val (Pmode, REG_RA))
5001 return (int)df_regs_ever_live_p (REG_RA);
5002
5003 push_topmost_sequence ();
5004 top = get_insns ();
5005 pop_topmost_sequence ();
5006
5007 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
5008 }
5009
5010 \f
5011 /* Return the trap mode suffix applicable to the current
5012 instruction, or NULL. */
5013
5014 static const char *
5015 get_trap_mode_suffix (void)
5016 {
5017 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
5018
5019 switch (s)
5020 {
5021 case TRAP_SUFFIX_NONE:
5022 return NULL;
5023
5024 case TRAP_SUFFIX_SU:
5025 if (alpha_fptm >= ALPHA_FPTM_SU)
5026 return "su";
5027 return NULL;
5028
5029 case TRAP_SUFFIX_SUI:
5030 if (alpha_fptm >= ALPHA_FPTM_SUI)
5031 return "sui";
5032 return NULL;
5033
5034 case TRAP_SUFFIX_V_SV:
5035 switch (alpha_fptm)
5036 {
5037 case ALPHA_FPTM_N:
5038 return NULL;
5039 case ALPHA_FPTM_U:
5040 return "v";
5041 case ALPHA_FPTM_SU:
5042 case ALPHA_FPTM_SUI:
5043 return "sv";
5044 default:
5045 gcc_unreachable ();
5046 }
5047
5048 case TRAP_SUFFIX_V_SV_SVI:
5049 switch (alpha_fptm)
5050 {
5051 case ALPHA_FPTM_N:
5052 return NULL;
5053 case ALPHA_FPTM_U:
5054 return "v";
5055 case ALPHA_FPTM_SU:
5056 return "sv";
5057 case ALPHA_FPTM_SUI:
5058 return "svi";
5059 default:
5060 gcc_unreachable ();
5061 }
5062 break;
5063
5064 case TRAP_SUFFIX_U_SU_SUI:
5065 switch (alpha_fptm)
5066 {
5067 case ALPHA_FPTM_N:
5068 return NULL;
5069 case ALPHA_FPTM_U:
5070 return "u";
5071 case ALPHA_FPTM_SU:
5072 return "su";
5073 case ALPHA_FPTM_SUI:
5074 return "sui";
5075 default:
5076 gcc_unreachable ();
5077 }
5078 break;
5079
5080 default:
5081 gcc_unreachable ();
5082 }
5083 gcc_unreachable ();
5084 }
5085
5086 /* Return the rounding mode suffix applicable to the current
5087 instruction, or NULL. */
5088
5089 static const char *
5090 get_round_mode_suffix (void)
5091 {
5092 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5093
5094 switch (s)
5095 {
5096 case ROUND_SUFFIX_NONE:
5097 return NULL;
5098 case ROUND_SUFFIX_NORMAL:
5099 switch (alpha_fprm)
5100 {
5101 case ALPHA_FPRM_NORM:
5102 return NULL;
5103 case ALPHA_FPRM_MINF:
5104 return "m";
5105 case ALPHA_FPRM_CHOP:
5106 return "c";
5107 case ALPHA_FPRM_DYN:
5108 return "d";
5109 default:
5110 gcc_unreachable ();
5111 }
5112 break;
5113
5114 case ROUND_SUFFIX_C:
5115 return "c";
5116
5117 default:
5118 gcc_unreachable ();
5119 }
5120 gcc_unreachable ();
5121 }
5122
5123 /* Locate some local-dynamic symbol still in use by this function
5124 so that we can print its name in some movdi_er_tlsldm pattern. */
5125
5126 static int
5127 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5128 {
5129 rtx x = *px;
5130
5131 if (GET_CODE (x) == SYMBOL_REF
5132 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
5133 {
5134 cfun->machine->some_ld_name = XSTR (x, 0);
5135 return 1;
5136 }
5137
5138 return 0;
5139 }
5140
5141 static const char *
5142 get_some_local_dynamic_name (void)
5143 {
5144 rtx_insn *insn;
5145
5146 if (cfun->machine->some_ld_name)
5147 return cfun->machine->some_ld_name;
5148
5149 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5150 if (INSN_P (insn)
5151 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5152 return cfun->machine->some_ld_name;
5153
5154 gcc_unreachable ();
5155 }
5156
5157 /* Print an operand. Recognize special options, documented below. */
5158
5159 void
5160 print_operand (FILE *file, rtx x, int code)
5161 {
5162 int i;
5163
5164 switch (code)
5165 {
5166 case '~':
5167 /* Print the assembler name of the current function. */
5168 assemble_name (file, alpha_fnname);
5169 break;
5170
5171 case '&':
5172 assemble_name (file, get_some_local_dynamic_name ());
5173 break;
5174
5175 case '/':
5176 {
5177 const char *trap = get_trap_mode_suffix ();
5178 const char *round = get_round_mode_suffix ();
5179
5180 if (trap || round)
5181 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5182 break;
5183 }
5184
5185 case ',':
5186 /* Generates single precision instruction suffix. */
5187 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5188 break;
5189
5190 case '-':
5191 /* Generates double precision instruction suffix. */
5192 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5193 break;
5194
5195 case '#':
5196 if (alpha_this_literal_sequence_number == 0)
5197 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5198 fprintf (file, "%d", alpha_this_literal_sequence_number);
5199 break;
5200
5201 case '*':
5202 if (alpha_this_gpdisp_sequence_number == 0)
5203 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5204 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5205 break;
5206
5207 case 'H':
5208 if (GET_CODE (x) == HIGH)
5209 output_addr_const (file, XEXP (x, 0));
5210 else
5211 output_operand_lossage ("invalid %%H value");
5212 break;
5213
5214 case 'J':
5215 {
5216 const char *lituse;
5217
5218 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5219 {
5220 x = XVECEXP (x, 0, 0);
5221 lituse = "lituse_tlsgd";
5222 }
5223 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5224 {
5225 x = XVECEXP (x, 0, 0);
5226 lituse = "lituse_tlsldm";
5227 }
5228 else if (CONST_INT_P (x))
5229 lituse = "lituse_jsr";
5230 else
5231 {
5232 output_operand_lossage ("invalid %%J value");
5233 break;
5234 }
5235
5236 if (x != const0_rtx)
5237 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5238 }
5239 break;
5240
5241 case 'j':
5242 {
5243 const char *lituse;
5244
5245 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5246 lituse = "lituse_jsrdirect";
5247 #else
5248 lituse = "lituse_jsr";
5249 #endif
5250
5251 gcc_assert (INTVAL (x) != 0);
5252 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5253 }
5254 break;
5255 case 'r':
5256 /* If this operand is the constant zero, write it as "$31". */
5257 if (REG_P (x))
5258 fprintf (file, "%s", reg_names[REGNO (x)]);
5259 else if (x == CONST0_RTX (GET_MODE (x)))
5260 fprintf (file, "$31");
5261 else
5262 output_operand_lossage ("invalid %%r value");
5263 break;
5264
5265 case 'R':
5266 /* Similar, but for floating-point. */
5267 if (REG_P (x))
5268 fprintf (file, "%s", reg_names[REGNO (x)]);
5269 else if (x == CONST0_RTX (GET_MODE (x)))
5270 fprintf (file, "$f31");
5271 else
5272 output_operand_lossage ("invalid %%R value");
5273 break;
5274
5275 case 'N':
5276 /* Write the 1's complement of a constant. */
5277 if (!CONST_INT_P (x))
5278 output_operand_lossage ("invalid %%N value");
5279
5280 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5281 break;
5282
5283 case 'P':
5284 /* Write 1 << C, for a constant C. */
5285 if (!CONST_INT_P (x))
5286 output_operand_lossage ("invalid %%P value");
5287
5288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5289 break;
5290
5291 case 'h':
5292 /* Write the high-order 16 bits of a constant, sign-extended. */
5293 if (!CONST_INT_P (x))
5294 output_operand_lossage ("invalid %%h value");
5295
5296 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5297 break;
5298
5299 case 'L':
5300 /* Write the low-order 16 bits of a constant, sign-extended. */
5301 if (!CONST_INT_P (x))
5302 output_operand_lossage ("invalid %%L value");
5303
5304 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5305 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5306 break;
5307
5308 case 'm':
5309 /* Write mask for ZAP insn. */
5310 if (GET_CODE (x) == CONST_DOUBLE)
5311 {
5312 HOST_WIDE_INT mask = 0;
5313 HOST_WIDE_INT value;
5314
5315 value = CONST_DOUBLE_LOW (x);
5316 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5317 i++, value >>= 8)
5318 if (value & 0xff)
5319 mask |= (1 << i);
5320
5321 value = CONST_DOUBLE_HIGH (x);
5322 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5323 i++, value >>= 8)
5324 if (value & 0xff)
5325 mask |= (1 << (i + sizeof (int)));
5326
5327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5328 }
5329
5330 else if (CONST_INT_P (x))
5331 {
5332 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5333
5334 for (i = 0; i < 8; i++, value >>= 8)
5335 if (value & 0xff)
5336 mask |= (1 << i);
5337
5338 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5339 }
5340 else
5341 output_operand_lossage ("invalid %%m value");
5342 break;
5343
5344 case 'M':
5345 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5346 if (!CONST_INT_P (x)
5347 || (INTVAL (x) != 8 && INTVAL (x) != 16
5348 && INTVAL (x) != 32 && INTVAL (x) != 64))
5349 output_operand_lossage ("invalid %%M value");
5350
5351 fprintf (file, "%s",
5352 (INTVAL (x) == 8 ? "b"
5353 : INTVAL (x) == 16 ? "w"
5354 : INTVAL (x) == 32 ? "l"
5355 : "q"));
5356 break;
5357
5358 case 'U':
5359 /* Similar, except do it from the mask. */
5360 if (CONST_INT_P (x))
5361 {
5362 HOST_WIDE_INT value = INTVAL (x);
5363
5364 if (value == 0xff)
5365 {
5366 fputc ('b', file);
5367 break;
5368 }
5369 if (value == 0xffff)
5370 {
5371 fputc ('w', file);
5372 break;
5373 }
5374 if (value == 0xffffffff)
5375 {
5376 fputc ('l', file);
5377 break;
5378 }
5379 if (value == -1)
5380 {
5381 fputc ('q', file);
5382 break;
5383 }
5384 }
5385 else if (HOST_BITS_PER_WIDE_INT == 32
5386 && GET_CODE (x) == CONST_DOUBLE
5387 && CONST_DOUBLE_LOW (x) == 0xffffffff
5388 && CONST_DOUBLE_HIGH (x) == 0)
5389 {
5390 fputc ('l', file);
5391 break;
5392 }
5393 output_operand_lossage ("invalid %%U value");
5394 break;
5395
5396 case 's':
5397 /* Write the constant value divided by 8. */
5398 if (!CONST_INT_P (x)
5399 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5400 || (INTVAL (x) & 7) != 0)
5401 output_operand_lossage ("invalid %%s value");
5402
5403 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5404 break;
5405
5406 case 'S':
5407 /* Same, except compute (64 - c) / 8 */
5408
5409 if (!CONST_INT_P (x)
5410 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5411 && (INTVAL (x) & 7) != 8)
5412 output_operand_lossage ("invalid %%s value");
5413
5414 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5415 break;
5416
5417 case 'C': case 'D': case 'c': case 'd':
5418 /* Write out comparison name. */
5419 {
5420 enum rtx_code c = GET_CODE (x);
5421
5422 if (!COMPARISON_P (x))
5423 output_operand_lossage ("invalid %%C value");
5424
5425 else if (code == 'D')
5426 c = reverse_condition (c);
5427 else if (code == 'c')
5428 c = swap_condition (c);
5429 else if (code == 'd')
5430 c = swap_condition (reverse_condition (c));
5431
5432 if (c == LEU)
5433 fprintf (file, "ule");
5434 else if (c == LTU)
5435 fprintf (file, "ult");
5436 else if (c == UNORDERED)
5437 fprintf (file, "un");
5438 else
5439 fprintf (file, "%s", GET_RTX_NAME (c));
5440 }
5441 break;
5442
5443 case 'E':
5444 /* Write the divide or modulus operator. */
5445 switch (GET_CODE (x))
5446 {
5447 case DIV:
5448 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5449 break;
5450 case UDIV:
5451 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5452 break;
5453 case MOD:
5454 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5455 break;
5456 case UMOD:
5457 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5458 break;
5459 default:
5460 output_operand_lossage ("invalid %%E value");
5461 break;
5462 }
5463 break;
5464
5465 case 'A':
5466 /* Write "_u" for unaligned access. */
5467 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5468 fprintf (file, "_u");
5469 break;
5470
5471 case 0:
5472 if (REG_P (x))
5473 fprintf (file, "%s", reg_names[REGNO (x)]);
5474 else if (MEM_P (x))
5475 output_address (XEXP (x, 0));
5476 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5477 {
5478 switch (XINT (XEXP (x, 0), 1))
5479 {
5480 case UNSPEC_DTPREL:
5481 case UNSPEC_TPREL:
5482 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5483 break;
5484 default:
5485 output_operand_lossage ("unknown relocation unspec");
5486 break;
5487 }
5488 }
5489 else
5490 output_addr_const (file, x);
5491 break;
5492
5493 default:
5494 output_operand_lossage ("invalid %%xn code");
5495 }
5496 }
5497
5498 void
5499 print_operand_address (FILE *file, rtx addr)
5500 {
5501 int basereg = 31;
5502 HOST_WIDE_INT offset = 0;
5503
5504 if (GET_CODE (addr) == AND)
5505 addr = XEXP (addr, 0);
5506
5507 if (GET_CODE (addr) == PLUS
5508 && CONST_INT_P (XEXP (addr, 1)))
5509 {
5510 offset = INTVAL (XEXP (addr, 1));
5511 addr = XEXP (addr, 0);
5512 }
5513
5514 if (GET_CODE (addr) == LO_SUM)
5515 {
5516 const char *reloc16, *reloclo;
5517 rtx op1 = XEXP (addr, 1);
5518
5519 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5520 {
5521 op1 = XEXP (op1, 0);
5522 switch (XINT (op1, 1))
5523 {
5524 case UNSPEC_DTPREL:
5525 reloc16 = NULL;
5526 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5527 break;
5528 case UNSPEC_TPREL:
5529 reloc16 = NULL;
5530 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5531 break;
5532 default:
5533 output_operand_lossage ("unknown relocation unspec");
5534 return;
5535 }
5536
5537 output_addr_const (file, XVECEXP (op1, 0, 0));
5538 }
5539 else
5540 {
5541 reloc16 = "gprel";
5542 reloclo = "gprellow";
5543 output_addr_const (file, op1);
5544 }
5545
5546 if (offset)
5547 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5548
5549 addr = XEXP (addr, 0);
5550 switch (GET_CODE (addr))
5551 {
5552 case REG:
5553 basereg = REGNO (addr);
5554 break;
5555
5556 case SUBREG:
5557 basereg = subreg_regno (addr);
5558 break;
5559
5560 default:
5561 gcc_unreachable ();
5562 }
5563
5564 fprintf (file, "($%d)\t\t!%s", basereg,
5565 (basereg == 29 ? reloc16 : reloclo));
5566 return;
5567 }
5568
5569 switch (GET_CODE (addr))
5570 {
5571 case REG:
5572 basereg = REGNO (addr);
5573 break;
5574
5575 case SUBREG:
5576 basereg = subreg_regno (addr);
5577 break;
5578
5579 case CONST_INT:
5580 offset = INTVAL (addr);
5581 break;
5582
5583 case SYMBOL_REF:
5584 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5585 fprintf (file, "%s", XSTR (addr, 0));
5586 return;
5587
5588 case CONST:
5589 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5590 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5591 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5592 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5593 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5594 INTVAL (XEXP (XEXP (addr, 0), 1)));
5595 return;
5596
5597 default:
5598 output_operand_lossage ("invalid operand address");
5599 return;
5600 }
5601
5602 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5603 }
5604 \f
5605 /* Emit RTL insns to initialize the variable parts of a trampoline at
5606 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5607 for the static chain value for the function. */
5608
5609 static void
5610 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5611 {
5612 rtx fnaddr, mem, word1, word2;
5613
5614 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5615
5616 #ifdef POINTERS_EXTEND_UNSIGNED
5617 fnaddr = convert_memory_address (Pmode, fnaddr);
5618 chain_value = convert_memory_address (Pmode, chain_value);
5619 #endif
5620
5621 if (TARGET_ABI_OPEN_VMS)
5622 {
5623 const char *fnname;
5624 char *trname;
5625
5626 /* Construct the name of the trampoline entry point. */
5627 fnname = XSTR (fnaddr, 0);
5628 trname = (char *) alloca (strlen (fnname) + 5);
5629 strcpy (trname, fnname);
5630 strcat (trname, "..tr");
5631 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5632 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5633
5634 /* Trampoline (or "bounded") procedure descriptor is constructed from
5635 the function's procedure descriptor with certain fields zeroed IAW
5636 the VMS calling standard. This is stored in the first quadword. */
5637 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5638 word1 = expand_and (DImode, word1,
5639 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5640 NULL);
5641 }
5642 else
5643 {
5644 /* These 4 instructions are:
5645 ldq $1,24($27)
5646 ldq $27,16($27)
5647 jmp $31,($27),0
5648 nop
5649 We don't bother setting the HINT field of the jump; the nop
5650 is merely there for padding. */
5651 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5652 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5653 }
5654
5655 /* Store the first two words, as computed above. */
5656 mem = adjust_address (m_tramp, DImode, 0);
5657 emit_move_insn (mem, word1);
5658 mem = adjust_address (m_tramp, DImode, 8);
5659 emit_move_insn (mem, word2);
5660
5661 /* Store function address and static chain value. */
5662 mem = adjust_address (m_tramp, Pmode, 16);
5663 emit_move_insn (mem, fnaddr);
5664 mem = adjust_address (m_tramp, Pmode, 24);
5665 emit_move_insn (mem, chain_value);
5666
5667 if (TARGET_ABI_OSF)
5668 {
5669 emit_insn (gen_imb ());
5670 #ifdef HAVE_ENABLE_EXECUTE_STACK
5671 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5672 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5673 #endif
5674 }
5675 }
5676 \f
5677 /* Determine where to put an argument to a function.
5678 Value is zero to push the argument on the stack,
5679 or a hard register in which to store the argument.
5680
5681 MODE is the argument's machine mode.
5682 TYPE is the data type of the argument (as a tree).
5683 This is null for libcalls where that information may
5684 not be available.
5685 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5686 the preceding args and about the function being called.
5687 NAMED is nonzero if this argument is a named parameter
5688 (otherwise it is an extra parameter matching an ellipsis).
5689
5690 On Alpha the first 6 words of args are normally in registers
5691 and the rest are pushed. */
5692
5693 static rtx
5694 alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
5695 const_tree type, bool named ATTRIBUTE_UNUSED)
5696 {
5697 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5698 int basereg;
5699 int num_args;
5700
5701 /* Don't get confused and pass small structures in FP registers. */
5702 if (type && AGGREGATE_TYPE_P (type))
5703 basereg = 16;
5704 else
5705 {
5706 #ifdef ENABLE_CHECKING
5707 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5708 values here. */
5709 gcc_assert (!COMPLEX_MODE_P (mode));
5710 #endif
5711
5712 /* Set up defaults for FP operands passed in FP registers, and
5713 integral operands passed in integer registers. */
5714 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5715 basereg = 32 + 16;
5716 else
5717 basereg = 16;
5718 }
5719
5720 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5721 the two platforms, so we can't avoid conditional compilation. */
5722 #if TARGET_ABI_OPEN_VMS
5723 {
5724 if (mode == VOIDmode)
5725 return alpha_arg_info_reg_val (*cum);
5726
5727 num_args = cum->num_args;
5728 if (num_args >= 6
5729 || targetm.calls.must_pass_in_stack (mode, type))
5730 return NULL_RTX;
5731 }
5732 #elif TARGET_ABI_OSF
5733 {
5734 if (*cum >= 6)
5735 return NULL_RTX;
5736 num_args = *cum;
5737
5738 /* VOID is passed as a special flag for "last argument". */
5739 if (type == void_type_node)
5740 basereg = 16;
5741 else if (targetm.calls.must_pass_in_stack (mode, type))
5742 return NULL_RTX;
5743 }
5744 #else
5745 #error Unhandled ABI
5746 #endif
5747
5748 return gen_rtx_REG (mode, num_args + basereg);
5749 }
5750
5751 /* Update the data in CUM to advance over an argument
5752 of mode MODE and data type TYPE.
5753 (TYPE is null for libcalls where that information may not be available.) */
5754
5755 static void
5756 alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
5757 const_tree type, bool named ATTRIBUTE_UNUSED)
5758 {
5759 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5760 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5761 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5762
5763 #if TARGET_ABI_OSF
5764 *cum += increment;
5765 #else
5766 if (!onstack && cum->num_args < 6)
5767 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5768 cum->num_args += increment;
5769 #endif
5770 }
5771
5772 static int
5773 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5774 enum machine_mode mode ATTRIBUTE_UNUSED,
5775 tree type ATTRIBUTE_UNUSED,
5776 bool named ATTRIBUTE_UNUSED)
5777 {
5778 int words = 0;
5779 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5780
5781 #if TARGET_ABI_OPEN_VMS
5782 if (cum->num_args < 6
5783 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5784 words = 6 - cum->num_args;
5785 #elif TARGET_ABI_OSF
5786 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5787 words = 6 - *cum;
5788 #else
5789 #error Unhandled ABI
5790 #endif
5791
5792 return words * UNITS_PER_WORD;
5793 }
5794
5795
5796 /* Return true if TYPE must be returned in memory, instead of in registers. */
5797
5798 static bool
5799 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5800 {
5801 enum machine_mode mode = VOIDmode;
5802 int size;
5803
5804 if (type)
5805 {
5806 mode = TYPE_MODE (type);
5807
5808 /* All aggregates are returned in memory, except on OpenVMS where
5809 records that fit 64 bits should be returned by immediate value
5810 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5811 if (TARGET_ABI_OPEN_VMS
5812 && TREE_CODE (type) != ARRAY_TYPE
5813 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5814 return false;
5815
5816 if (AGGREGATE_TYPE_P (type))
5817 return true;
5818 }
5819
5820 size = GET_MODE_SIZE (mode);
5821 switch (GET_MODE_CLASS (mode))
5822 {
5823 case MODE_VECTOR_FLOAT:
5824 /* Pass all float vectors in memory, like an aggregate. */
5825 return true;
5826
5827 case MODE_COMPLEX_FLOAT:
5828 /* We judge complex floats on the size of their element,
5829 not the size of the whole type. */
5830 size = GET_MODE_UNIT_SIZE (mode);
5831 break;
5832
5833 case MODE_INT:
5834 case MODE_FLOAT:
5835 case MODE_COMPLEX_INT:
5836 case MODE_VECTOR_INT:
5837 break;
5838
5839 default:
5840 /* ??? We get called on all sorts of random stuff from
5841 aggregate_value_p. We must return something, but it's not
5842 clear what's safe to return. Pretend it's a struct I
5843 guess. */
5844 return true;
5845 }
5846
5847 /* Otherwise types must fit in one register. */
5848 return size > UNITS_PER_WORD;
5849 }
5850
5851 /* Return true if TYPE should be passed by invisible reference. */
5852
5853 static bool
5854 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5855 enum machine_mode mode,
5856 const_tree type ATTRIBUTE_UNUSED,
5857 bool named ATTRIBUTE_UNUSED)
5858 {
5859 return mode == TFmode || mode == TCmode;
5860 }
5861
5862 /* Define how to find the value returned by a function. VALTYPE is the
5863 data type of the value (as a tree). If the precise function being
5864 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5865 MODE is set instead of VALTYPE for libcalls.
5866
5867 On Alpha the value is found in $0 for integer functions and
5868 $f0 for floating-point functions. */
5869
5870 rtx
5871 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5872 enum machine_mode mode)
5873 {
5874 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5875 enum mode_class mclass;
5876
5877 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5878
5879 if (valtype)
5880 mode = TYPE_MODE (valtype);
5881
5882 mclass = GET_MODE_CLASS (mode);
5883 switch (mclass)
5884 {
5885 case MODE_INT:
5886 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5887 where we have them returning both SImode and DImode. */
5888 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5889 PROMOTE_MODE (mode, dummy, valtype);
5890 /* FALLTHRU */
5891
5892 case MODE_COMPLEX_INT:
5893 case MODE_VECTOR_INT:
5894 regnum = 0;
5895 break;
5896
5897 case MODE_FLOAT:
5898 regnum = 32;
5899 break;
5900
5901 case MODE_COMPLEX_FLOAT:
5902 {
5903 enum machine_mode cmode = GET_MODE_INNER (mode);
5904
5905 return gen_rtx_PARALLEL
5906 (VOIDmode,
5907 gen_rtvec (2,
5908 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5909 const0_rtx),
5910 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5911 GEN_INT (GET_MODE_SIZE (cmode)))));
5912 }
5913
5914 case MODE_RANDOM:
5915 /* We should only reach here for BLKmode on VMS. */
5916 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5917 regnum = 0;
5918 break;
5919
5920 default:
5921 gcc_unreachable ();
5922 }
5923
5924 return gen_rtx_REG (mode, regnum);
5925 }
5926
5927 /* TCmode complex values are passed by invisible reference. We
5928 should not split these values. */
5929
5930 static bool
5931 alpha_split_complex_arg (const_tree type)
5932 {
5933 return TYPE_MODE (type) != TCmode;
5934 }
5935
5936 static tree
5937 alpha_build_builtin_va_list (void)
5938 {
5939 tree base, ofs, space, record, type_decl;
5940
5941 if (TARGET_ABI_OPEN_VMS)
5942 return ptr_type_node;
5943
5944 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5945 type_decl = build_decl (BUILTINS_LOCATION,
5946 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5947 TYPE_STUB_DECL (record) = type_decl;
5948 TYPE_NAME (record) = type_decl;
5949
5950 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5951
5952 /* Dummy field to prevent alignment warnings. */
5953 space = build_decl (BUILTINS_LOCATION,
5954 FIELD_DECL, NULL_TREE, integer_type_node);
5955 DECL_FIELD_CONTEXT (space) = record;
5956 DECL_ARTIFICIAL (space) = 1;
5957 DECL_IGNORED_P (space) = 1;
5958
5959 ofs = build_decl (BUILTINS_LOCATION,
5960 FIELD_DECL, get_identifier ("__offset"),
5961 integer_type_node);
5962 DECL_FIELD_CONTEXT (ofs) = record;
5963 DECL_CHAIN (ofs) = space;
5964 /* ??? This is a hack, __offset is marked volatile to prevent
5965 DCE that confuses stdarg optimization and results in
5966 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */
5967 TREE_THIS_VOLATILE (ofs) = 1;
5968
5969 base = build_decl (BUILTINS_LOCATION,
5970 FIELD_DECL, get_identifier ("__base"),
5971 ptr_type_node);
5972 DECL_FIELD_CONTEXT (base) = record;
5973 DECL_CHAIN (base) = ofs;
5974
5975 TYPE_FIELDS (record) = base;
5976 layout_type (record);
5977
5978 va_list_gpr_counter_field = ofs;
5979 return record;
5980 }
5981
5982 #if TARGET_ABI_OSF
5983 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5984 and constant additions. */
5985
5986 static gimple
5987 va_list_skip_additions (tree lhs)
5988 {
5989 gimple stmt;
5990
5991 for (;;)
5992 {
5993 enum tree_code code;
5994
5995 stmt = SSA_NAME_DEF_STMT (lhs);
5996
5997 if (gimple_code (stmt) == GIMPLE_PHI)
5998 return stmt;
5999
6000 if (!is_gimple_assign (stmt)
6001 || gimple_assign_lhs (stmt) != lhs)
6002 return NULL;
6003
6004 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
6005 return stmt;
6006 code = gimple_assign_rhs_code (stmt);
6007 if (!CONVERT_EXPR_CODE_P (code)
6008 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
6009 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
6010 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
6011 return stmt;
6012
6013 lhs = gimple_assign_rhs1 (stmt);
6014 }
6015 }
6016
6017 /* Check if LHS = RHS statement is
6018 LHS = *(ap.__base + ap.__offset + cst)
6019 or
6020 LHS = *(ap.__base
6021 + ((ap.__offset + cst <= 47)
6022 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
6023 If the former, indicate that GPR registers are needed,
6024 if the latter, indicate that FPR registers are needed.
6025
6026 Also look for LHS = (*ptr).field, where ptr is one of the forms
6027 listed above.
6028
6029 On alpha, cfun->va_list_gpr_size is used as size of the needed
6030 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
6031 registers are needed and bit 1 set if FPR registers are needed.
6032 Return true if va_list references should not be scanned for the
6033 current statement. */
6034
6035 static bool
6036 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
6037 {
6038 tree base, offset, rhs;
6039 int offset_arg = 1;
6040 gimple base_stmt;
6041
6042 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
6043 != GIMPLE_SINGLE_RHS)
6044 return false;
6045
6046 rhs = gimple_assign_rhs1 (stmt);
6047 while (handled_component_p (rhs))
6048 rhs = TREE_OPERAND (rhs, 0);
6049 if (TREE_CODE (rhs) != MEM_REF
6050 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
6051 return false;
6052
6053 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
6054 if (stmt == NULL
6055 || !is_gimple_assign (stmt)
6056 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
6057 return false;
6058
6059 base = gimple_assign_rhs1 (stmt);
6060 if (TREE_CODE (base) == SSA_NAME)
6061 {
6062 base_stmt = va_list_skip_additions (base);
6063 if (base_stmt
6064 && is_gimple_assign (base_stmt)
6065 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6066 base = gimple_assign_rhs1 (base_stmt);
6067 }
6068
6069 if (TREE_CODE (base) != COMPONENT_REF
6070 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6071 {
6072 base = gimple_assign_rhs2 (stmt);
6073 if (TREE_CODE (base) == SSA_NAME)
6074 {
6075 base_stmt = va_list_skip_additions (base);
6076 if (base_stmt
6077 && is_gimple_assign (base_stmt)
6078 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6079 base = gimple_assign_rhs1 (base_stmt);
6080 }
6081
6082 if (TREE_CODE (base) != COMPONENT_REF
6083 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6084 return false;
6085
6086 offset_arg = 0;
6087 }
6088
6089 base = get_base_address (base);
6090 if (TREE_CODE (base) != VAR_DECL
6091 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
6092 return false;
6093
6094 offset = gimple_op (stmt, 1 + offset_arg);
6095 if (TREE_CODE (offset) == SSA_NAME)
6096 {
6097 gimple offset_stmt = va_list_skip_additions (offset);
6098
6099 if (offset_stmt
6100 && gimple_code (offset_stmt) == GIMPLE_PHI)
6101 {
6102 HOST_WIDE_INT sub;
6103 gimple arg1_stmt, arg2_stmt;
6104 tree arg1, arg2;
6105 enum tree_code code1, code2;
6106
6107 if (gimple_phi_num_args (offset_stmt) != 2)
6108 goto escapes;
6109
6110 arg1_stmt
6111 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6112 arg2_stmt
6113 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6114 if (arg1_stmt == NULL
6115 || !is_gimple_assign (arg1_stmt)
6116 || arg2_stmt == NULL
6117 || !is_gimple_assign (arg2_stmt))
6118 goto escapes;
6119
6120 code1 = gimple_assign_rhs_code (arg1_stmt);
6121 code2 = gimple_assign_rhs_code (arg2_stmt);
6122 if (code1 == COMPONENT_REF
6123 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6124 /* Do nothing. */;
6125 else if (code2 == COMPONENT_REF
6126 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6127 {
6128 gimple tem = arg1_stmt;
6129 code2 = code1;
6130 arg1_stmt = arg2_stmt;
6131 arg2_stmt = tem;
6132 }
6133 else
6134 goto escapes;
6135
6136 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6137 goto escapes;
6138
6139 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6140 if (code2 == MINUS_EXPR)
6141 sub = -sub;
6142 if (sub < -48 || sub > -32)
6143 goto escapes;
6144
6145 arg1 = gimple_assign_rhs1 (arg1_stmt);
6146 arg2 = gimple_assign_rhs1 (arg2_stmt);
6147 if (TREE_CODE (arg2) == SSA_NAME)
6148 {
6149 arg2_stmt = va_list_skip_additions (arg2);
6150 if (arg2_stmt == NULL
6151 || !is_gimple_assign (arg2_stmt)
6152 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6153 goto escapes;
6154 arg2 = gimple_assign_rhs1 (arg2_stmt);
6155 }
6156 if (arg1 != arg2)
6157 goto escapes;
6158
6159 if (TREE_CODE (arg1) != COMPONENT_REF
6160 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6161 || get_base_address (arg1) != base)
6162 goto escapes;
6163
6164 /* Need floating point regs. */
6165 cfun->va_list_fpr_size |= 2;
6166 return false;
6167 }
6168 if (offset_stmt
6169 && is_gimple_assign (offset_stmt)
6170 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6171 offset = gimple_assign_rhs1 (offset_stmt);
6172 }
6173 if (TREE_CODE (offset) != COMPONENT_REF
6174 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6175 || get_base_address (offset) != base)
6176 goto escapes;
6177 else
6178 /* Need general regs. */
6179 cfun->va_list_fpr_size |= 1;
6180 return false;
6181
6182 escapes:
6183 si->va_list_escapes = true;
6184 return false;
6185 }
6186 #endif
6187
6188 /* Perform any needed actions needed for a function that is receiving a
6189 variable number of arguments. */
6190
6191 static void
6192 alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode,
6193 tree type, int *pretend_size, int no_rtl)
6194 {
6195 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6196
6197 /* Skip the current argument. */
6198 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6199 true);
6200
6201 #if TARGET_ABI_OPEN_VMS
6202 /* For VMS, we allocate space for all 6 arg registers plus a count.
6203
6204 However, if NO registers need to be saved, don't allocate any space.
6205 This is not only because we won't need the space, but because AP
6206 includes the current_pretend_args_size and we don't want to mess up
6207 any ap-relative addresses already made. */
6208 if (cum.num_args < 6)
6209 {
6210 if (!no_rtl)
6211 {
6212 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6213 emit_insn (gen_arg_home ());
6214 }
6215 *pretend_size = 7 * UNITS_PER_WORD;
6216 }
6217 #else
6218 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6219 only push those that are remaining. However, if NO registers need to
6220 be saved, don't allocate any space. This is not only because we won't
6221 need the space, but because AP includes the current_pretend_args_size
6222 and we don't want to mess up any ap-relative addresses already made.
6223
6224 If we are not to use the floating-point registers, save the integer
6225 registers where we would put the floating-point registers. This is
6226 not the most efficient way to implement varargs with just one register
6227 class, but it isn't worth doing anything more efficient in this rare
6228 case. */
6229 if (cum >= 6)
6230 return;
6231
6232 if (!no_rtl)
6233 {
6234 int count;
6235 alias_set_type set = get_varargs_alias_set ();
6236 rtx tmp;
6237
6238 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6239 if (count > 6 - cum)
6240 count = 6 - cum;
6241
6242 /* Detect whether integer registers or floating-point registers
6243 are needed by the detected va_arg statements. See above for
6244 how these values are computed. Note that the "escape" value
6245 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6246 these bits set. */
6247 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6248
6249 if (cfun->va_list_fpr_size & 1)
6250 {
6251 tmp = gen_rtx_MEM (BLKmode,
6252 plus_constant (Pmode, virtual_incoming_args_rtx,
6253 (cum + 6) * UNITS_PER_WORD));
6254 MEM_NOTRAP_P (tmp) = 1;
6255 set_mem_alias_set (tmp, set);
6256 move_block_from_reg (16 + cum, tmp, count);
6257 }
6258
6259 if (cfun->va_list_fpr_size & 2)
6260 {
6261 tmp = gen_rtx_MEM (BLKmode,
6262 plus_constant (Pmode, virtual_incoming_args_rtx,
6263 cum * UNITS_PER_WORD));
6264 MEM_NOTRAP_P (tmp) = 1;
6265 set_mem_alias_set (tmp, set);
6266 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6267 }
6268 }
6269 *pretend_size = 12 * UNITS_PER_WORD;
6270 #endif
6271 }
6272
6273 static void
6274 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6275 {
6276 HOST_WIDE_INT offset;
6277 tree t, offset_field, base_field;
6278
6279 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6280 return;
6281
6282 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6283 up by 48, storing fp arg registers in the first 48 bytes, and the
6284 integer arg registers in the next 48 bytes. This is only done,
6285 however, if any integer registers need to be stored.
6286
6287 If no integer registers need be stored, then we must subtract 48
6288 in order to account for the integer arg registers which are counted
6289 in argsize above, but which are not actually stored on the stack.
6290 Must further be careful here about structures straddling the last
6291 integer argument register; that futzes with pretend_args_size,
6292 which changes the meaning of AP. */
6293
6294 if (NUM_ARGS < 6)
6295 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6296 else
6297 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6298
6299 if (TARGET_ABI_OPEN_VMS)
6300 {
6301 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6302 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6303 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6304 TREE_SIDE_EFFECTS (t) = 1;
6305 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6306 }
6307 else
6308 {
6309 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6310 offset_field = DECL_CHAIN (base_field);
6311
6312 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6313 valist, base_field, NULL_TREE);
6314 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6315 valist, offset_field, NULL_TREE);
6316
6317 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6318 t = fold_build_pointer_plus_hwi (t, offset);
6319 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6320 TREE_SIDE_EFFECTS (t) = 1;
6321 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6322
6323 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6324 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6325 TREE_SIDE_EFFECTS (t) = 1;
6326 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6327 }
6328 }
6329
6330 static tree
6331 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6332 gimple_seq *pre_p)
6333 {
6334 tree type_size, ptr_type, addend, t, addr;
6335 gimple_seq internal_post;
6336
6337 /* If the type could not be passed in registers, skip the block
6338 reserved for the registers. */
6339 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6340 {
6341 t = build_int_cst (TREE_TYPE (offset), 6*8);
6342 gimplify_assign (offset,
6343 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6344 pre_p);
6345 }
6346
6347 addend = offset;
6348 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6349
6350 if (TREE_CODE (type) == COMPLEX_TYPE)
6351 {
6352 tree real_part, imag_part, real_temp;
6353
6354 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6355 offset, pre_p);
6356
6357 /* Copy the value into a new temporary, lest the formal temporary
6358 be reused out from under us. */
6359 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6360
6361 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6362 offset, pre_p);
6363
6364 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6365 }
6366 else if (TREE_CODE (type) == REAL_TYPE)
6367 {
6368 tree fpaddend, cond, fourtyeight;
6369
6370 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6371 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6372 addend, fourtyeight);
6373 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6374 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6375 fpaddend, addend);
6376 }
6377
6378 /* Build the final address and force that value into a temporary. */
6379 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6380 internal_post = NULL;
6381 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6382 gimple_seq_add_seq (pre_p, internal_post);
6383
6384 /* Update the offset field. */
6385 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6386 if (type_size == NULL || TREE_OVERFLOW (type_size))
6387 t = size_zero_node;
6388 else
6389 {
6390 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6391 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6392 t = size_binop (MULT_EXPR, t, size_int (8));
6393 }
6394 t = fold_convert (TREE_TYPE (offset), t);
6395 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6396 pre_p);
6397
6398 return build_va_arg_indirect_ref (addr);
6399 }
6400
6401 static tree
6402 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6403 gimple_seq *post_p)
6404 {
6405 tree offset_field, base_field, offset, base, t, r;
6406 bool indirect;
6407
6408 if (TARGET_ABI_OPEN_VMS)
6409 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6410
6411 base_field = TYPE_FIELDS (va_list_type_node);
6412 offset_field = DECL_CHAIN (base_field);
6413 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6414 valist, base_field, NULL_TREE);
6415 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6416 valist, offset_field, NULL_TREE);
6417
6418 /* Pull the fields of the structure out into temporaries. Since we never
6419 modify the base field, we can use a formal temporary. Sign-extend the
6420 offset field so that it's the proper width for pointer arithmetic. */
6421 base = get_formal_tmp_var (base_field, pre_p);
6422
6423 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6424 offset = get_initialized_tmp_var (t, pre_p, NULL);
6425
6426 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6427 if (indirect)
6428 type = build_pointer_type_for_mode (type, ptr_mode, true);
6429
6430 /* Find the value. Note that this will be a stable indirection, or
6431 a composite of stable indirections in the case of complex. */
6432 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6433
6434 /* Stuff the offset temporary back into its field. */
6435 gimplify_assign (unshare_expr (offset_field),
6436 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6437
6438 if (indirect)
6439 r = build_va_arg_indirect_ref (r);
6440
6441 return r;
6442 }
6443 \f
6444 /* Builtins. */
6445
6446 enum alpha_builtin
6447 {
6448 ALPHA_BUILTIN_CMPBGE,
6449 ALPHA_BUILTIN_EXTBL,
6450 ALPHA_BUILTIN_EXTWL,
6451 ALPHA_BUILTIN_EXTLL,
6452 ALPHA_BUILTIN_EXTQL,
6453 ALPHA_BUILTIN_EXTWH,
6454 ALPHA_BUILTIN_EXTLH,
6455 ALPHA_BUILTIN_EXTQH,
6456 ALPHA_BUILTIN_INSBL,
6457 ALPHA_BUILTIN_INSWL,
6458 ALPHA_BUILTIN_INSLL,
6459 ALPHA_BUILTIN_INSQL,
6460 ALPHA_BUILTIN_INSWH,
6461 ALPHA_BUILTIN_INSLH,
6462 ALPHA_BUILTIN_INSQH,
6463 ALPHA_BUILTIN_MSKBL,
6464 ALPHA_BUILTIN_MSKWL,
6465 ALPHA_BUILTIN_MSKLL,
6466 ALPHA_BUILTIN_MSKQL,
6467 ALPHA_BUILTIN_MSKWH,
6468 ALPHA_BUILTIN_MSKLH,
6469 ALPHA_BUILTIN_MSKQH,
6470 ALPHA_BUILTIN_UMULH,
6471 ALPHA_BUILTIN_ZAP,
6472 ALPHA_BUILTIN_ZAPNOT,
6473 ALPHA_BUILTIN_AMASK,
6474 ALPHA_BUILTIN_IMPLVER,
6475 ALPHA_BUILTIN_RPCC,
6476 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6477 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6478
6479 /* TARGET_MAX */
6480 ALPHA_BUILTIN_MINUB8,
6481 ALPHA_BUILTIN_MINSB8,
6482 ALPHA_BUILTIN_MINUW4,
6483 ALPHA_BUILTIN_MINSW4,
6484 ALPHA_BUILTIN_MAXUB8,
6485 ALPHA_BUILTIN_MAXSB8,
6486 ALPHA_BUILTIN_MAXUW4,
6487 ALPHA_BUILTIN_MAXSW4,
6488 ALPHA_BUILTIN_PERR,
6489 ALPHA_BUILTIN_PKLB,
6490 ALPHA_BUILTIN_PKWB,
6491 ALPHA_BUILTIN_UNPKBL,
6492 ALPHA_BUILTIN_UNPKBW,
6493
6494 /* TARGET_CIX */
6495 ALPHA_BUILTIN_CTTZ,
6496 ALPHA_BUILTIN_CTLZ,
6497 ALPHA_BUILTIN_CTPOP,
6498
6499 ALPHA_BUILTIN_max
6500 };
6501
6502 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6503 CODE_FOR_builtin_cmpbge,
6504 CODE_FOR_extbl,
6505 CODE_FOR_extwl,
6506 CODE_FOR_extll,
6507 CODE_FOR_extql,
6508 CODE_FOR_extwh,
6509 CODE_FOR_extlh,
6510 CODE_FOR_extqh,
6511 CODE_FOR_builtin_insbl,
6512 CODE_FOR_builtin_inswl,
6513 CODE_FOR_builtin_insll,
6514 CODE_FOR_insql,
6515 CODE_FOR_inswh,
6516 CODE_FOR_inslh,
6517 CODE_FOR_insqh,
6518 CODE_FOR_mskbl,
6519 CODE_FOR_mskwl,
6520 CODE_FOR_mskll,
6521 CODE_FOR_mskql,
6522 CODE_FOR_mskwh,
6523 CODE_FOR_msklh,
6524 CODE_FOR_mskqh,
6525 CODE_FOR_umuldi3_highpart,
6526 CODE_FOR_builtin_zap,
6527 CODE_FOR_builtin_zapnot,
6528 CODE_FOR_builtin_amask,
6529 CODE_FOR_builtin_implver,
6530 CODE_FOR_builtin_rpcc,
6531 CODE_FOR_builtin_establish_vms_condition_handler,
6532 CODE_FOR_builtin_revert_vms_condition_handler,
6533
6534 /* TARGET_MAX */
6535 CODE_FOR_builtin_minub8,
6536 CODE_FOR_builtin_minsb8,
6537 CODE_FOR_builtin_minuw4,
6538 CODE_FOR_builtin_minsw4,
6539 CODE_FOR_builtin_maxub8,
6540 CODE_FOR_builtin_maxsb8,
6541 CODE_FOR_builtin_maxuw4,
6542 CODE_FOR_builtin_maxsw4,
6543 CODE_FOR_builtin_perr,
6544 CODE_FOR_builtin_pklb,
6545 CODE_FOR_builtin_pkwb,
6546 CODE_FOR_builtin_unpkbl,
6547 CODE_FOR_builtin_unpkbw,
6548
6549 /* TARGET_CIX */
6550 CODE_FOR_ctzdi2,
6551 CODE_FOR_clzdi2,
6552 CODE_FOR_popcountdi2
6553 };
6554
6555 struct alpha_builtin_def
6556 {
6557 const char *name;
6558 enum alpha_builtin code;
6559 unsigned int target_mask;
6560 bool is_const;
6561 };
6562
6563 static struct alpha_builtin_def const zero_arg_builtins[] = {
6564 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6565 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6566 };
6567
6568 static struct alpha_builtin_def const one_arg_builtins[] = {
6569 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6570 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6571 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6572 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6573 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6574 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6575 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6576 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6577 };
6578
6579 static struct alpha_builtin_def const two_arg_builtins[] = {
6580 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6581 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6582 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6583 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6584 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6585 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6586 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6587 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6588 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6589 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6590 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6591 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6592 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6593 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6594 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6595 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6596 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6597 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6598 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6599 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6600 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6601 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6602 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6603 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6604 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6605 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6606 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6607 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6608 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6609 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6610 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6611 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6612 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6613 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6614 };
6615
6616 static GTY(()) tree alpha_dimode_u;
6617 static GTY(()) tree alpha_v8qi_u;
6618 static GTY(()) tree alpha_v8qi_s;
6619 static GTY(()) tree alpha_v4hi_u;
6620 static GTY(()) tree alpha_v4hi_s;
6621
6622 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6623
6624 /* Return the alpha builtin for CODE. */
6625
6626 static tree
6627 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6628 {
6629 if (code >= ALPHA_BUILTIN_max)
6630 return error_mark_node;
6631 return alpha_builtins[code];
6632 }
6633
6634 /* Helper function of alpha_init_builtins. Add the built-in specified
6635 by NAME, TYPE, CODE, and ECF. */
6636
6637 static void
6638 alpha_builtin_function (const char *name, tree ftype,
6639 enum alpha_builtin code, unsigned ecf)
6640 {
6641 tree decl = add_builtin_function (name, ftype, (int) code,
6642 BUILT_IN_MD, NULL, NULL_TREE);
6643
6644 if (ecf & ECF_CONST)
6645 TREE_READONLY (decl) = 1;
6646 if (ecf & ECF_NOTHROW)
6647 TREE_NOTHROW (decl) = 1;
6648
6649 alpha_builtins [(int) code] = decl;
6650 }
6651
6652 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6653 functions pointed to by P, with function type FTYPE. */
6654
6655 static void
6656 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6657 tree ftype)
6658 {
6659 size_t i;
6660
6661 for (i = 0; i < count; ++i, ++p)
6662 if ((target_flags & p->target_mask) == p->target_mask)
6663 alpha_builtin_function (p->name, ftype, p->code,
6664 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6665 }
6666
6667 static void
6668 alpha_init_builtins (void)
6669 {
6670 tree ftype;
6671
6672 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6673 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6674 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6675 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6676 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6677
6678 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6679 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6680
6681 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6682 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6683
6684 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6685 alpha_dimode_u, NULL_TREE);
6686 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6687
6688 if (TARGET_ABI_OPEN_VMS)
6689 {
6690 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6691 NULL_TREE);
6692 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6693 ftype,
6694 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6695 0);
6696
6697 ftype = build_function_type_list (ptr_type_node, void_type_node,
6698 NULL_TREE);
6699 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6700 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6701
6702 vms_patch_builtins ();
6703 }
6704 }
6705
6706 /* Expand an expression EXP that calls a built-in function,
6707 with result going to TARGET if that's convenient
6708 (and in mode MODE if that's convenient).
6709 SUBTARGET may be used as the target for computing one of EXP's operands.
6710 IGNORE is nonzero if the value is to be ignored. */
6711
6712 static rtx
6713 alpha_expand_builtin (tree exp, rtx target,
6714 rtx subtarget ATTRIBUTE_UNUSED,
6715 enum machine_mode mode ATTRIBUTE_UNUSED,
6716 int ignore ATTRIBUTE_UNUSED)
6717 {
6718 #define MAX_ARGS 2
6719
6720 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6721 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6722 tree arg;
6723 call_expr_arg_iterator iter;
6724 enum insn_code icode;
6725 rtx op[MAX_ARGS], pat;
6726 int arity;
6727 bool nonvoid;
6728
6729 if (fcode >= ALPHA_BUILTIN_max)
6730 internal_error ("bad builtin fcode");
6731 icode = code_for_builtin[fcode];
6732 if (icode == 0)
6733 internal_error ("bad builtin fcode");
6734
6735 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6736
6737 arity = 0;
6738 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6739 {
6740 const struct insn_operand_data *insn_op;
6741
6742 if (arg == error_mark_node)
6743 return NULL_RTX;
6744 if (arity > MAX_ARGS)
6745 return NULL_RTX;
6746
6747 insn_op = &insn_data[icode].operand[arity + nonvoid];
6748
6749 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6750
6751 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6752 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6753 arity++;
6754 }
6755
6756 if (nonvoid)
6757 {
6758 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6759 if (!target
6760 || GET_MODE (target) != tmode
6761 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6762 target = gen_reg_rtx (tmode);
6763 }
6764
6765 switch (arity)
6766 {
6767 case 0:
6768 pat = GEN_FCN (icode) (target);
6769 break;
6770 case 1:
6771 if (nonvoid)
6772 pat = GEN_FCN (icode) (target, op[0]);
6773 else
6774 pat = GEN_FCN (icode) (op[0]);
6775 break;
6776 case 2:
6777 pat = GEN_FCN (icode) (target, op[0], op[1]);
6778 break;
6779 default:
6780 gcc_unreachable ();
6781 }
6782 if (!pat)
6783 return NULL_RTX;
6784 emit_insn (pat);
6785
6786 if (nonvoid)
6787 return target;
6788 else
6789 return const0_rtx;
6790 }
6791
6792
6793 /* Several bits below assume HWI >= 64 bits. This should be enforced
6794 by config.gcc. */
6795 #if HOST_BITS_PER_WIDE_INT < 64
6796 # error "HOST_WIDE_INT too small"
6797 #endif
6798
6799 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6800 with an 8-bit output vector. OPINT contains the integer operands; bit N
6801 of OP_CONST is set if OPINT[N] is valid. */
6802
6803 static tree
6804 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6805 {
6806 if (op_const == 3)
6807 {
6808 int i, val;
6809 for (i = 0, val = 0; i < 8; ++i)
6810 {
6811 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6812 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6813 if (c0 >= c1)
6814 val |= 1 << i;
6815 }
6816 return build_int_cst (alpha_dimode_u, val);
6817 }
6818 else if (op_const == 2 && opint[1] == 0)
6819 return build_int_cst (alpha_dimode_u, 0xff);
6820 return NULL;
6821 }
6822
6823 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6824 specialized form of an AND operation. Other byte manipulation instructions
6825 are defined in terms of this instruction, so this is also used as a
6826 subroutine for other builtins.
6827
6828 OP contains the tree operands; OPINT contains the extracted integer values.
6829 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6830 OPINT may be considered. */
6831
6832 static tree
6833 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6834 long op_const)
6835 {
6836 if (op_const & 2)
6837 {
6838 unsigned HOST_WIDE_INT mask = 0;
6839 int i;
6840
6841 for (i = 0; i < 8; ++i)
6842 if ((opint[1] >> i) & 1)
6843 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6844
6845 if (op_const & 1)
6846 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6847
6848 if (op)
6849 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6850 build_int_cst (alpha_dimode_u, mask));
6851 }
6852 else if ((op_const & 1) && opint[0] == 0)
6853 return build_int_cst (alpha_dimode_u, 0);
6854 return NULL;
6855 }
6856
6857 /* Fold the builtins for the EXT family of instructions. */
6858
6859 static tree
6860 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6861 long op_const, unsigned HOST_WIDE_INT bytemask,
6862 bool is_high)
6863 {
6864 long zap_const = 2;
6865 tree *zap_op = NULL;
6866
6867 if (op_const & 2)
6868 {
6869 unsigned HOST_WIDE_INT loc;
6870
6871 loc = opint[1] & 7;
6872 loc *= BITS_PER_UNIT;
6873
6874 if (loc != 0)
6875 {
6876 if (op_const & 1)
6877 {
6878 unsigned HOST_WIDE_INT temp = opint[0];
6879 if (is_high)
6880 temp <<= loc;
6881 else
6882 temp >>= loc;
6883 opint[0] = temp;
6884 zap_const = 3;
6885 }
6886 }
6887 else
6888 zap_op = op;
6889 }
6890
6891 opint[1] = bytemask;
6892 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6893 }
6894
6895 /* Fold the builtins for the INS family of instructions. */
6896
6897 static tree
6898 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6899 long op_const, unsigned HOST_WIDE_INT bytemask,
6900 bool is_high)
6901 {
6902 if ((op_const & 1) && opint[0] == 0)
6903 return build_int_cst (alpha_dimode_u, 0);
6904
6905 if (op_const & 2)
6906 {
6907 unsigned HOST_WIDE_INT temp, loc, byteloc;
6908 tree *zap_op = NULL;
6909
6910 loc = opint[1] & 7;
6911 bytemask <<= loc;
6912
6913 temp = opint[0];
6914 if (is_high)
6915 {
6916 byteloc = (64 - (loc * 8)) & 0x3f;
6917 if (byteloc == 0)
6918 zap_op = op;
6919 else
6920 temp >>= byteloc;
6921 bytemask >>= 8;
6922 }
6923 else
6924 {
6925 byteloc = loc * 8;
6926 if (byteloc == 0)
6927 zap_op = op;
6928 else
6929 temp <<= byteloc;
6930 }
6931
6932 opint[0] = temp;
6933 opint[1] = bytemask;
6934 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6935 }
6936
6937 return NULL;
6938 }
6939
6940 static tree
6941 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6942 long op_const, unsigned HOST_WIDE_INT bytemask,
6943 bool is_high)
6944 {
6945 if (op_const & 2)
6946 {
6947 unsigned HOST_WIDE_INT loc;
6948
6949 loc = opint[1] & 7;
6950 bytemask <<= loc;
6951
6952 if (is_high)
6953 bytemask >>= 8;
6954
6955 opint[1] = bytemask ^ 0xff;
6956 }
6957
6958 return alpha_fold_builtin_zapnot (op, opint, op_const);
6959 }
6960
6961 static tree
6962 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6963 {
6964 tree op0 = fold_convert (vtype, op[0]);
6965 tree op1 = fold_convert (vtype, op[1]);
6966 tree val = fold_build2 (code, vtype, op0, op1);
6967 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6968 }
6969
6970 static tree
6971 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6972 {
6973 unsigned HOST_WIDE_INT temp = 0;
6974 int i;
6975
6976 if (op_const != 3)
6977 return NULL;
6978
6979 for (i = 0; i < 8; ++i)
6980 {
6981 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6982 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6983 if (a >= b)
6984 temp += a - b;
6985 else
6986 temp += b - a;
6987 }
6988
6989 return build_int_cst (alpha_dimode_u, temp);
6990 }
6991
6992 static tree
6993 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6994 {
6995 unsigned HOST_WIDE_INT temp;
6996
6997 if (op_const == 0)
6998 return NULL;
6999
7000 temp = opint[0] & 0xff;
7001 temp |= (opint[0] >> 24) & 0xff00;
7002
7003 return build_int_cst (alpha_dimode_u, temp);
7004 }
7005
7006 static tree
7007 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
7008 {
7009 unsigned HOST_WIDE_INT temp;
7010
7011 if (op_const == 0)
7012 return NULL;
7013
7014 temp = opint[0] & 0xff;
7015 temp |= (opint[0] >> 8) & 0xff00;
7016 temp |= (opint[0] >> 16) & 0xff0000;
7017 temp |= (opint[0] >> 24) & 0xff000000;
7018
7019 return build_int_cst (alpha_dimode_u, temp);
7020 }
7021
7022 static tree
7023 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
7024 {
7025 unsigned HOST_WIDE_INT temp;
7026
7027 if (op_const == 0)
7028 return NULL;
7029
7030 temp = opint[0] & 0xff;
7031 temp |= (opint[0] & 0xff00) << 24;
7032
7033 return build_int_cst (alpha_dimode_u, temp);
7034 }
7035
7036 static tree
7037 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
7038 {
7039 unsigned HOST_WIDE_INT temp;
7040
7041 if (op_const == 0)
7042 return NULL;
7043
7044 temp = opint[0] & 0xff;
7045 temp |= (opint[0] & 0x0000ff00) << 8;
7046 temp |= (opint[0] & 0x00ff0000) << 16;
7047 temp |= (opint[0] & 0xff000000) << 24;
7048
7049 return build_int_cst (alpha_dimode_u, temp);
7050 }
7051
7052 static tree
7053 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
7054 {
7055 unsigned HOST_WIDE_INT temp;
7056
7057 if (op_const == 0)
7058 return NULL;
7059
7060 if (opint[0] == 0)
7061 temp = 64;
7062 else
7063 temp = exact_log2 (opint[0] & -opint[0]);
7064
7065 return build_int_cst (alpha_dimode_u, temp);
7066 }
7067
7068 static tree
7069 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
7070 {
7071 unsigned HOST_WIDE_INT temp;
7072
7073 if (op_const == 0)
7074 return NULL;
7075
7076 if (opint[0] == 0)
7077 temp = 64;
7078 else
7079 temp = 64 - floor_log2 (opint[0]) - 1;
7080
7081 return build_int_cst (alpha_dimode_u, temp);
7082 }
7083
7084 static tree
7085 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7086 {
7087 unsigned HOST_WIDE_INT temp, op;
7088
7089 if (op_const == 0)
7090 return NULL;
7091
7092 op = opint[0];
7093 temp = 0;
7094 while (op)
7095 temp++, op &= op - 1;
7096
7097 return build_int_cst (alpha_dimode_u, temp);
7098 }
7099
7100 /* Fold one of our builtin functions. */
7101
7102 static tree
7103 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7104 bool ignore ATTRIBUTE_UNUSED)
7105 {
7106 unsigned HOST_WIDE_INT opint[MAX_ARGS];
7107 long op_const = 0;
7108 int i;
7109
7110 if (n_args > MAX_ARGS)
7111 return NULL;
7112
7113 for (i = 0; i < n_args; i++)
7114 {
7115 tree arg = op[i];
7116 if (arg == error_mark_node)
7117 return NULL;
7118
7119 opint[i] = 0;
7120 if (TREE_CODE (arg) == INTEGER_CST)
7121 {
7122 op_const |= 1L << i;
7123 opint[i] = int_cst_value (arg);
7124 }
7125 }
7126
7127 switch (DECL_FUNCTION_CODE (fndecl))
7128 {
7129 case ALPHA_BUILTIN_CMPBGE:
7130 return alpha_fold_builtin_cmpbge (opint, op_const);
7131
7132 case ALPHA_BUILTIN_EXTBL:
7133 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7134 case ALPHA_BUILTIN_EXTWL:
7135 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7136 case ALPHA_BUILTIN_EXTLL:
7137 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7138 case ALPHA_BUILTIN_EXTQL:
7139 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7140 case ALPHA_BUILTIN_EXTWH:
7141 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7142 case ALPHA_BUILTIN_EXTLH:
7143 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7144 case ALPHA_BUILTIN_EXTQH:
7145 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7146
7147 case ALPHA_BUILTIN_INSBL:
7148 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7149 case ALPHA_BUILTIN_INSWL:
7150 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7151 case ALPHA_BUILTIN_INSLL:
7152 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7153 case ALPHA_BUILTIN_INSQL:
7154 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7155 case ALPHA_BUILTIN_INSWH:
7156 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7157 case ALPHA_BUILTIN_INSLH:
7158 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7159 case ALPHA_BUILTIN_INSQH:
7160 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7161
7162 case ALPHA_BUILTIN_MSKBL:
7163 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7164 case ALPHA_BUILTIN_MSKWL:
7165 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7166 case ALPHA_BUILTIN_MSKLL:
7167 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7168 case ALPHA_BUILTIN_MSKQL:
7169 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7170 case ALPHA_BUILTIN_MSKWH:
7171 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7172 case ALPHA_BUILTIN_MSKLH:
7173 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7174 case ALPHA_BUILTIN_MSKQH:
7175 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7176
7177 case ALPHA_BUILTIN_ZAP:
7178 opint[1] ^= 0xff;
7179 /* FALLTHRU */
7180 case ALPHA_BUILTIN_ZAPNOT:
7181 return alpha_fold_builtin_zapnot (op, opint, op_const);
7182
7183 case ALPHA_BUILTIN_MINUB8:
7184 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7185 case ALPHA_BUILTIN_MINSB8:
7186 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7187 case ALPHA_BUILTIN_MINUW4:
7188 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7189 case ALPHA_BUILTIN_MINSW4:
7190 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7191 case ALPHA_BUILTIN_MAXUB8:
7192 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7193 case ALPHA_BUILTIN_MAXSB8:
7194 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7195 case ALPHA_BUILTIN_MAXUW4:
7196 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7197 case ALPHA_BUILTIN_MAXSW4:
7198 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7199
7200 case ALPHA_BUILTIN_PERR:
7201 return alpha_fold_builtin_perr (opint, op_const);
7202 case ALPHA_BUILTIN_PKLB:
7203 return alpha_fold_builtin_pklb (opint, op_const);
7204 case ALPHA_BUILTIN_PKWB:
7205 return alpha_fold_builtin_pkwb (opint, op_const);
7206 case ALPHA_BUILTIN_UNPKBL:
7207 return alpha_fold_builtin_unpkbl (opint, op_const);
7208 case ALPHA_BUILTIN_UNPKBW:
7209 return alpha_fold_builtin_unpkbw (opint, op_const);
7210
7211 case ALPHA_BUILTIN_CTTZ:
7212 return alpha_fold_builtin_cttz (opint, op_const);
7213 case ALPHA_BUILTIN_CTLZ:
7214 return alpha_fold_builtin_ctlz (opint, op_const);
7215 case ALPHA_BUILTIN_CTPOP:
7216 return alpha_fold_builtin_ctpop (opint, op_const);
7217
7218 case ALPHA_BUILTIN_AMASK:
7219 case ALPHA_BUILTIN_IMPLVER:
7220 case ALPHA_BUILTIN_RPCC:
7221 /* None of these are foldable at compile-time. */
7222 default:
7223 return NULL;
7224 }
7225 }
7226
7227 bool
7228 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7229 {
7230 bool changed = false;
7231 gimple stmt = gsi_stmt (*gsi);
7232 tree call = gimple_call_fn (stmt);
7233 gimple new_stmt = NULL;
7234
7235 if (call)
7236 {
7237 tree fndecl = gimple_call_fndecl (stmt);
7238
7239 if (fndecl)
7240 {
7241 tree arg0, arg1;
7242
7243 switch (DECL_FUNCTION_CODE (fndecl))
7244 {
7245 case ALPHA_BUILTIN_UMULH:
7246 arg0 = gimple_call_arg (stmt, 0);
7247 arg1 = gimple_call_arg (stmt, 1);
7248
7249 new_stmt
7250 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR,
7251 gimple_call_lhs (stmt),
7252 arg0,
7253 arg1);
7254 break;
7255 default:
7256 break;
7257 }
7258 }
7259 }
7260
7261 if (new_stmt)
7262 {
7263 gsi_replace (gsi, new_stmt, true);
7264 changed = true;
7265 }
7266
7267 return changed;
7268 }
7269 \f
7270 /* This page contains routines that are used to determine what the function
7271 prologue and epilogue code will do and write them out. */
7272
7273 /* Compute the size of the save area in the stack. */
7274
7275 /* These variables are used for communication between the following functions.
7276 They indicate various things about the current function being compiled
7277 that are used to tell what kind of prologue, epilogue and procedure
7278 descriptor to generate. */
7279
7280 /* Nonzero if we need a stack procedure. */
7281 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7282 static enum alpha_procedure_types alpha_procedure_type;
7283
7284 /* Register number (either FP or SP) that is used to unwind the frame. */
7285 static int vms_unwind_regno;
7286
7287 /* Register number used to save FP. We need not have one for RA since
7288 we don't modify it for register procedures. This is only defined
7289 for register frame procedures. */
7290 static int vms_save_fp_regno;
7291
7292 /* Register number used to reference objects off our PV. */
7293 static int vms_base_regno;
7294
7295 /* Compute register masks for saved registers. */
7296
7297 static void
7298 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7299 {
7300 unsigned long imask = 0;
7301 unsigned long fmask = 0;
7302 unsigned int i;
7303
7304 /* When outputting a thunk, we don't have valid register life info,
7305 but assemble_start_function wants to output .frame and .mask
7306 directives. */
7307 if (cfun->is_thunk)
7308 {
7309 *imaskP = 0;
7310 *fmaskP = 0;
7311 return;
7312 }
7313
7314 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7315 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7316
7317 /* One for every register we have to save. */
7318 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7319 if (! fixed_regs[i] && ! call_used_regs[i]
7320 && df_regs_ever_live_p (i) && i != REG_RA)
7321 {
7322 if (i < 32)
7323 imask |= (1UL << i);
7324 else
7325 fmask |= (1UL << (i - 32));
7326 }
7327
7328 /* We need to restore these for the handler. */
7329 if (crtl->calls_eh_return)
7330 {
7331 for (i = 0; ; ++i)
7332 {
7333 unsigned regno = EH_RETURN_DATA_REGNO (i);
7334 if (regno == INVALID_REGNUM)
7335 break;
7336 imask |= 1UL << regno;
7337 }
7338 }
7339
7340 /* If any register spilled, then spill the return address also. */
7341 /* ??? This is required by the Digital stack unwind specification
7342 and isn't needed if we're doing Dwarf2 unwinding. */
7343 if (imask || fmask || alpha_ra_ever_killed ())
7344 imask |= (1UL << REG_RA);
7345
7346 *imaskP = imask;
7347 *fmaskP = fmask;
7348 }
7349
7350 int
7351 alpha_sa_size (void)
7352 {
7353 unsigned long mask[2];
7354 int sa_size = 0;
7355 int i, j;
7356
7357 alpha_sa_mask (&mask[0], &mask[1]);
7358
7359 for (j = 0; j < 2; ++j)
7360 for (i = 0; i < 32; ++i)
7361 if ((mask[j] >> i) & 1)
7362 sa_size++;
7363
7364 if (TARGET_ABI_OPEN_VMS)
7365 {
7366 /* Start with a stack procedure if we make any calls (REG_RA used), or
7367 need a frame pointer, with a register procedure if we otherwise need
7368 at least a slot, and with a null procedure in other cases. */
7369 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7370 alpha_procedure_type = PT_STACK;
7371 else if (get_frame_size() != 0)
7372 alpha_procedure_type = PT_REGISTER;
7373 else
7374 alpha_procedure_type = PT_NULL;
7375
7376 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7377 made the final decision on stack procedure vs register procedure. */
7378 if (alpha_procedure_type == PT_STACK)
7379 sa_size -= 2;
7380
7381 /* Decide whether to refer to objects off our PV via FP or PV.
7382 If we need FP for something else or if we receive a nonlocal
7383 goto (which expects PV to contain the value), we must use PV.
7384 Otherwise, start by assuming we can use FP. */
7385
7386 vms_base_regno
7387 = (frame_pointer_needed
7388 || cfun->has_nonlocal_label
7389 || alpha_procedure_type == PT_STACK
7390 || crtl->outgoing_args_size)
7391 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7392
7393 /* If we want to copy PV into FP, we need to find some register
7394 in which to save FP. */
7395
7396 vms_save_fp_regno = -1;
7397 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7398 for (i = 0; i < 32; i++)
7399 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7400 vms_save_fp_regno = i;
7401
7402 /* A VMS condition handler requires a stack procedure in our
7403 implementation. (not required by the calling standard). */
7404 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7405 || cfun->machine->uses_condition_handler)
7406 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7407 else if (alpha_procedure_type == PT_NULL)
7408 vms_base_regno = REG_PV;
7409
7410 /* Stack unwinding should be done via FP unless we use it for PV. */
7411 vms_unwind_regno = (vms_base_regno == REG_PV
7412 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7413
7414 /* If this is a stack procedure, allow space for saving FP, RA and
7415 a condition handler slot if needed. */
7416 if (alpha_procedure_type == PT_STACK)
7417 sa_size += 2 + cfun->machine->uses_condition_handler;
7418 }
7419 else
7420 {
7421 /* Our size must be even (multiple of 16 bytes). */
7422 if (sa_size & 1)
7423 sa_size++;
7424 }
7425
7426 return sa_size * 8;
7427 }
7428
7429 /* Define the offset between two registers, one to be eliminated,
7430 and the other its replacement, at the start of a routine. */
7431
7432 HOST_WIDE_INT
7433 alpha_initial_elimination_offset (unsigned int from,
7434 unsigned int to ATTRIBUTE_UNUSED)
7435 {
7436 HOST_WIDE_INT ret;
7437
7438 ret = alpha_sa_size ();
7439 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7440
7441 switch (from)
7442 {
7443 case FRAME_POINTER_REGNUM:
7444 break;
7445
7446 case ARG_POINTER_REGNUM:
7447 ret += (ALPHA_ROUND (get_frame_size ()
7448 + crtl->args.pretend_args_size)
7449 - crtl->args.pretend_args_size);
7450 break;
7451
7452 default:
7453 gcc_unreachable ();
7454 }
7455
7456 return ret;
7457 }
7458
7459 #if TARGET_ABI_OPEN_VMS
7460
7461 /* Worker function for TARGET_CAN_ELIMINATE. */
7462
7463 static bool
7464 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7465 {
7466 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7467 alpha_sa_size ();
7468
7469 switch (alpha_procedure_type)
7470 {
7471 case PT_NULL:
7472 /* NULL procedures have no frame of their own and we only
7473 know how to resolve from the current stack pointer. */
7474 return to == STACK_POINTER_REGNUM;
7475
7476 case PT_REGISTER:
7477 case PT_STACK:
7478 /* We always eliminate except to the stack pointer if there is no
7479 usable frame pointer at hand. */
7480 return (to != STACK_POINTER_REGNUM
7481 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7482 }
7483
7484 gcc_unreachable ();
7485 }
7486
7487 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7488 designates the same location as FROM. */
7489
7490 HOST_WIDE_INT
7491 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7492 {
7493 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7494 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7495 on the proper computations and will need the register save area size
7496 in most cases. */
7497
7498 HOST_WIDE_INT sa_size = alpha_sa_size ();
7499
7500 /* PT_NULL procedures have no frame of their own and we only allow
7501 elimination to the stack pointer. This is the argument pointer and we
7502 resolve the soft frame pointer to that as well. */
7503
7504 if (alpha_procedure_type == PT_NULL)
7505 return 0;
7506
7507 /* For a PT_STACK procedure the frame layout looks as follows
7508
7509 -----> decreasing addresses
7510
7511 < size rounded up to 16 | likewise >
7512 --------------#------------------------------+++--------------+++-------#
7513 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7514 --------------#---------------------------------------------------------#
7515 ^ ^ ^ ^
7516 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7517
7518
7519 PT_REGISTER procedures are similar in that they may have a frame of their
7520 own. They have no regs-sa/pv/outgoing-args area.
7521
7522 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7523 to STACK_PTR if need be. */
7524
7525 {
7526 HOST_WIDE_INT offset;
7527 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7528
7529 switch (from)
7530 {
7531 case FRAME_POINTER_REGNUM:
7532 offset = ALPHA_ROUND (sa_size + pv_save_size);
7533 break;
7534 case ARG_POINTER_REGNUM:
7535 offset = (ALPHA_ROUND (sa_size + pv_save_size
7536 + get_frame_size ()
7537 + crtl->args.pretend_args_size)
7538 - crtl->args.pretend_args_size);
7539 break;
7540 default:
7541 gcc_unreachable ();
7542 }
7543
7544 if (to == STACK_POINTER_REGNUM)
7545 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7546
7547 return offset;
7548 }
7549 }
7550
7551 #define COMMON_OBJECT "common_object"
7552
7553 static tree
7554 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7555 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7556 bool *no_add_attrs ATTRIBUTE_UNUSED)
7557 {
7558 tree decl = *node;
7559 gcc_assert (DECL_P (decl));
7560
7561 DECL_COMMON (decl) = 1;
7562 return NULL_TREE;
7563 }
7564
7565 static const struct attribute_spec vms_attribute_table[] =
7566 {
7567 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7568 affects_type_identity } */
7569 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7570 { NULL, 0, 0, false, false, false, NULL, false }
7571 };
7572
7573 void
7574 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7575 unsigned HOST_WIDE_INT size,
7576 unsigned int align)
7577 {
7578 tree attr = DECL_ATTRIBUTES (decl);
7579 fprintf (file, "%s", COMMON_ASM_OP);
7580 assemble_name (file, name);
7581 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7582 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7583 fprintf (file, ",%u", align / BITS_PER_UNIT);
7584 if (attr)
7585 {
7586 attr = lookup_attribute (COMMON_OBJECT, attr);
7587 if (attr)
7588 fprintf (file, ",%s",
7589 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7590 }
7591 fputc ('\n', file);
7592 }
7593
7594 #undef COMMON_OBJECT
7595
7596 #endif
7597
7598 static int
7599 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7600 {
7601 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7602 }
7603
7604 int
7605 alpha_find_lo_sum_using_gp (rtx insn)
7606 {
7607 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7608 }
7609
7610 static int
7611 alpha_does_function_need_gp (void)
7612 {
7613 rtx_insn *insn;
7614
7615 /* The GP being variable is an OSF abi thing. */
7616 if (! TARGET_ABI_OSF)
7617 return 0;
7618
7619 /* We need the gp to load the address of __mcount. */
7620 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7621 return 1;
7622
7623 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7624 if (cfun->is_thunk)
7625 return 1;
7626
7627 /* The nonlocal receiver pattern assumes that the gp is valid for
7628 the nested function. Reasonable because it's almost always set
7629 correctly already. For the cases where that's wrong, make sure
7630 the nested function loads its gp on entry. */
7631 if (crtl->has_nonlocal_goto)
7632 return 1;
7633
7634 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7635 Even if we are a static function, we still need to do this in case
7636 our address is taken and passed to something like qsort. */
7637
7638 push_topmost_sequence ();
7639 insn = get_insns ();
7640 pop_topmost_sequence ();
7641
7642 for (; insn; insn = NEXT_INSN (insn))
7643 if (NONDEBUG_INSN_P (insn)
7644 && GET_CODE (PATTERN (insn)) != USE
7645 && GET_CODE (PATTERN (insn)) != CLOBBER
7646 && get_attr_usegp (insn))
7647 return 1;
7648
7649 return 0;
7650 }
7651
7652 \f
7653 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7654 sequences. */
7655
7656 static rtx_insn *
7657 set_frame_related_p (void)
7658 {
7659 rtx_insn *seq = get_insns ();
7660 rtx_insn *insn;
7661
7662 end_sequence ();
7663
7664 if (!seq)
7665 return NULL;
7666
7667 if (INSN_P (seq))
7668 {
7669 insn = seq;
7670 while (insn != NULL_RTX)
7671 {
7672 RTX_FRAME_RELATED_P (insn) = 1;
7673 insn = NEXT_INSN (insn);
7674 }
7675 seq = emit_insn (seq);
7676 }
7677 else
7678 {
7679 seq = emit_insn (seq);
7680 RTX_FRAME_RELATED_P (seq) = 1;
7681 }
7682 return seq;
7683 }
7684
7685 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7686
7687 /* Generates a store with the proper unwind info attached. VALUE is
7688 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7689 contains SP+FRAME_BIAS, and that is the unwind info that should be
7690 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7691 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7692
7693 static void
7694 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7695 HOST_WIDE_INT base_ofs, rtx frame_reg)
7696 {
7697 rtx addr, mem;
7698 rtx_insn *insn;
7699
7700 addr = plus_constant (Pmode, base_reg, base_ofs);
7701 mem = gen_frame_mem (DImode, addr);
7702
7703 insn = emit_move_insn (mem, value);
7704 RTX_FRAME_RELATED_P (insn) = 1;
7705
7706 if (frame_bias || value != frame_reg)
7707 {
7708 if (frame_bias)
7709 {
7710 addr = plus_constant (Pmode, stack_pointer_rtx,
7711 frame_bias + base_ofs);
7712 mem = gen_rtx_MEM (DImode, addr);
7713 }
7714
7715 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7716 gen_rtx_SET (VOIDmode, mem, frame_reg));
7717 }
7718 }
7719
7720 static void
7721 emit_frame_store (unsigned int regno, rtx base_reg,
7722 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7723 {
7724 rtx reg = gen_rtx_REG (DImode, regno);
7725 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7726 }
7727
7728 /* Compute the frame size. SIZE is the size of the "naked" frame
7729 and SA_SIZE is the size of the register save area. */
7730
7731 static HOST_WIDE_INT
7732 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7733 {
7734 if (TARGET_ABI_OPEN_VMS)
7735 return ALPHA_ROUND (sa_size
7736 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7737 + size
7738 + crtl->args.pretend_args_size);
7739 else
7740 return ALPHA_ROUND (crtl->outgoing_args_size)
7741 + sa_size
7742 + ALPHA_ROUND (size
7743 + crtl->args.pretend_args_size);
7744 }
7745
7746 /* Write function prologue. */
7747
7748 /* On vms we have two kinds of functions:
7749
7750 - stack frame (PROC_STACK)
7751 these are 'normal' functions with local vars and which are
7752 calling other functions
7753 - register frame (PROC_REGISTER)
7754 keeps all data in registers, needs no stack
7755
7756 We must pass this to the assembler so it can generate the
7757 proper pdsc (procedure descriptor)
7758 This is done with the '.pdesc' command.
7759
7760 On not-vms, we don't really differentiate between the two, as we can
7761 simply allocate stack without saving registers. */
7762
7763 void
7764 alpha_expand_prologue (void)
7765 {
7766 /* Registers to save. */
7767 unsigned long imask = 0;
7768 unsigned long fmask = 0;
7769 /* Stack space needed for pushing registers clobbered by us. */
7770 HOST_WIDE_INT sa_size, sa_bias;
7771 /* Complete stack size needed. */
7772 HOST_WIDE_INT frame_size;
7773 /* Probed stack size; it additionally includes the size of
7774 the "reserve region" if any. */
7775 HOST_WIDE_INT probed_size;
7776 /* Offset from base reg to register save area. */
7777 HOST_WIDE_INT reg_offset;
7778 rtx sa_reg;
7779 int i;
7780
7781 sa_size = alpha_sa_size ();
7782 frame_size = compute_frame_size (get_frame_size (), sa_size);
7783
7784 if (flag_stack_usage_info)
7785 current_function_static_stack_size = frame_size;
7786
7787 if (TARGET_ABI_OPEN_VMS)
7788 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7789 else
7790 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7791
7792 alpha_sa_mask (&imask, &fmask);
7793
7794 /* Emit an insn to reload GP, if needed. */
7795 if (TARGET_ABI_OSF)
7796 {
7797 alpha_function_needs_gp = alpha_does_function_need_gp ();
7798 if (alpha_function_needs_gp)
7799 emit_insn (gen_prologue_ldgp ());
7800 }
7801
7802 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7803 the call to mcount ourselves, rather than having the linker do it
7804 magically in response to -pg. Since _mcount has special linkage,
7805 don't represent the call as a call. */
7806 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7807 emit_insn (gen_prologue_mcount ());
7808
7809 /* Adjust the stack by the frame size. If the frame size is > 4096
7810 bytes, we need to be sure we probe somewhere in the first and last
7811 4096 bytes (we can probably get away without the latter test) and
7812 every 8192 bytes in between. If the frame size is > 32768, we
7813 do this in a loop. Otherwise, we generate the explicit probe
7814 instructions.
7815
7816 Note that we are only allowed to adjust sp once in the prologue. */
7817
7818 probed_size = frame_size;
7819 if (flag_stack_check)
7820 probed_size += STACK_CHECK_PROTECT;
7821
7822 if (probed_size <= 32768)
7823 {
7824 if (probed_size > 4096)
7825 {
7826 int probed;
7827
7828 for (probed = 4096; probed < probed_size; probed += 8192)
7829 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7830
7831 /* We only have to do this probe if we aren't saving registers or
7832 if we are probing beyond the frame because of -fstack-check. */
7833 if ((sa_size == 0 && probed_size > probed - 4096)
7834 || flag_stack_check)
7835 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7836 }
7837
7838 if (frame_size != 0)
7839 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7840 GEN_INT (-frame_size))));
7841 }
7842 else
7843 {
7844 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7845 number of 8192 byte blocks to probe. We then probe each block
7846 in the loop and then set SP to the proper location. If the
7847 amount remaining is > 4096, we have to do one more probe if we
7848 are not saving any registers or if we are probing beyond the
7849 frame because of -fstack-check. */
7850
7851 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7852 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7853 rtx ptr = gen_rtx_REG (DImode, 22);
7854 rtx count = gen_rtx_REG (DImode, 23);
7855 rtx seq;
7856
7857 emit_move_insn (count, GEN_INT (blocks));
7858 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7859
7860 /* Because of the difficulty in emitting a new basic block this
7861 late in the compilation, generate the loop as a single insn. */
7862 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7863
7864 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7865 {
7866 rtx last = gen_rtx_MEM (DImode,
7867 plus_constant (Pmode, ptr, -leftover));
7868 MEM_VOLATILE_P (last) = 1;
7869 emit_move_insn (last, const0_rtx);
7870 }
7871
7872 if (flag_stack_check)
7873 {
7874 /* If -fstack-check is specified we have to load the entire
7875 constant into a register and subtract from the sp in one go,
7876 because the probed stack size is not equal to the frame size. */
7877 HOST_WIDE_INT lo, hi;
7878 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7879 hi = frame_size - lo;
7880
7881 emit_move_insn (ptr, GEN_INT (hi));
7882 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7883 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7884 ptr));
7885 }
7886 else
7887 {
7888 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7889 GEN_INT (-leftover)));
7890 }
7891
7892 /* This alternative is special, because the DWARF code cannot
7893 possibly intuit through the loop above. So we invent this
7894 note it looks at instead. */
7895 RTX_FRAME_RELATED_P (seq) = 1;
7896 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7897 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7898 plus_constant (Pmode, stack_pointer_rtx,
7899 -frame_size)));
7900 }
7901
7902 /* Cope with very large offsets to the register save area. */
7903 sa_bias = 0;
7904 sa_reg = stack_pointer_rtx;
7905 if (reg_offset + sa_size > 0x8000)
7906 {
7907 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7908 rtx sa_bias_rtx;
7909
7910 if (low + sa_size <= 0x8000)
7911 sa_bias = reg_offset - low, reg_offset = low;
7912 else
7913 sa_bias = reg_offset, reg_offset = 0;
7914
7915 sa_reg = gen_rtx_REG (DImode, 24);
7916 sa_bias_rtx = GEN_INT (sa_bias);
7917
7918 if (add_operand (sa_bias_rtx, DImode))
7919 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7920 else
7921 {
7922 emit_move_insn (sa_reg, sa_bias_rtx);
7923 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7924 }
7925 }
7926
7927 /* Save regs in stack order. Beginning with VMS PV. */
7928 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7929 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7930
7931 /* Save register RA next. */
7932 if (imask & (1UL << REG_RA))
7933 {
7934 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7935 imask &= ~(1UL << REG_RA);
7936 reg_offset += 8;
7937 }
7938
7939 /* Now save any other registers required to be saved. */
7940 for (i = 0; i < 31; i++)
7941 if (imask & (1UL << i))
7942 {
7943 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7944 reg_offset += 8;
7945 }
7946
7947 for (i = 0; i < 31; i++)
7948 if (fmask & (1UL << i))
7949 {
7950 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7951 reg_offset += 8;
7952 }
7953
7954 if (TARGET_ABI_OPEN_VMS)
7955 {
7956 /* Register frame procedures save the fp. */
7957 if (alpha_procedure_type == PT_REGISTER)
7958 {
7959 rtx_insn *insn =
7960 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7961 hard_frame_pointer_rtx);
7962 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7963 RTX_FRAME_RELATED_P (insn) = 1;
7964 }
7965
7966 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7967 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7968 gen_rtx_REG (DImode, REG_PV)));
7969
7970 if (alpha_procedure_type != PT_NULL
7971 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7972 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7973
7974 /* If we have to allocate space for outgoing args, do it now. */
7975 if (crtl->outgoing_args_size != 0)
7976 {
7977 rtx_insn *seq
7978 = emit_move_insn (stack_pointer_rtx,
7979 plus_constant
7980 (Pmode, hard_frame_pointer_rtx,
7981 - (ALPHA_ROUND
7982 (crtl->outgoing_args_size))));
7983
7984 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7985 if ! frame_pointer_needed. Setting the bit will change the CFA
7986 computation rule to use sp again, which would be wrong if we had
7987 frame_pointer_needed, as this means sp might move unpredictably
7988 later on.
7989
7990 Also, note that
7991 frame_pointer_needed
7992 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7993 and
7994 crtl->outgoing_args_size != 0
7995 => alpha_procedure_type != PT_NULL,
7996
7997 so when we are not setting the bit here, we are guaranteed to
7998 have emitted an FRP frame pointer update just before. */
7999 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
8000 }
8001 }
8002 else
8003 {
8004 /* If we need a frame pointer, set it from the stack pointer. */
8005 if (frame_pointer_needed)
8006 {
8007 if (TARGET_CAN_FAULT_IN_PROLOGUE)
8008 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
8009 else
8010 /* This must always be the last instruction in the
8011 prologue, thus we emit a special move + clobber. */
8012 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
8013 stack_pointer_rtx, sa_reg)));
8014 }
8015 }
8016
8017 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
8018 the prologue, for exception handling reasons, we cannot do this for
8019 any insn that might fault. We could prevent this for mems with a
8020 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
8021 have to prevent all such scheduling with a blockage.
8022
8023 Linux, on the other hand, never bothered to implement OSF/1's
8024 exception handling, and so doesn't care about such things. Anyone
8025 planning to use dwarf2 frame-unwind info can also omit the blockage. */
8026
8027 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
8028 emit_insn (gen_blockage ());
8029 }
8030
8031 /* Count the number of .file directives, so that .loc is up to date. */
8032 int num_source_filenames = 0;
8033
8034 /* Output the textual info surrounding the prologue. */
8035
8036 void
8037 alpha_start_function (FILE *file, const char *fnname,
8038 tree decl ATTRIBUTE_UNUSED)
8039 {
8040 unsigned long imask = 0;
8041 unsigned long fmask = 0;
8042 /* Stack space needed for pushing registers clobbered by us. */
8043 HOST_WIDE_INT sa_size;
8044 /* Complete stack size needed. */
8045 unsigned HOST_WIDE_INT frame_size;
8046 /* The maximum debuggable frame size. */
8047 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
8048 /* Offset from base reg to register save area. */
8049 HOST_WIDE_INT reg_offset;
8050 char *entry_label = (char *) alloca (strlen (fnname) + 6);
8051 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
8052 int i;
8053
8054 #if TARGET_ABI_OPEN_VMS
8055 vms_start_function (fnname);
8056 #endif
8057
8058 alpha_fnname = fnname;
8059 sa_size = alpha_sa_size ();
8060 frame_size = compute_frame_size (get_frame_size (), sa_size);
8061
8062 if (TARGET_ABI_OPEN_VMS)
8063 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8064 else
8065 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8066
8067 alpha_sa_mask (&imask, &fmask);
8068
8069 /* Issue function start and label. */
8070 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8071 {
8072 fputs ("\t.ent ", file);
8073 assemble_name (file, fnname);
8074 putc ('\n', file);
8075
8076 /* If the function needs GP, we'll write the "..ng" label there.
8077 Otherwise, do it here. */
8078 if (TARGET_ABI_OSF
8079 && ! alpha_function_needs_gp
8080 && ! cfun->is_thunk)
8081 {
8082 putc ('$', file);
8083 assemble_name (file, fnname);
8084 fputs ("..ng:\n", file);
8085 }
8086 }
8087 /* Nested functions on VMS that are potentially called via trampoline
8088 get a special transfer entry point that loads the called functions
8089 procedure descriptor and static chain. */
8090 if (TARGET_ABI_OPEN_VMS
8091 && !TREE_PUBLIC (decl)
8092 && DECL_CONTEXT (decl)
8093 && !TYPE_P (DECL_CONTEXT (decl))
8094 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8095 {
8096 strcpy (tramp_label, fnname);
8097 strcat (tramp_label, "..tr");
8098 ASM_OUTPUT_LABEL (file, tramp_label);
8099 fprintf (file, "\tldq $1,24($27)\n");
8100 fprintf (file, "\tldq $27,16($27)\n");
8101 }
8102
8103 strcpy (entry_label, fnname);
8104 if (TARGET_ABI_OPEN_VMS)
8105 strcat (entry_label, "..en");
8106
8107 ASM_OUTPUT_LABEL (file, entry_label);
8108 inside_function = TRUE;
8109
8110 if (TARGET_ABI_OPEN_VMS)
8111 fprintf (file, "\t.base $%d\n", vms_base_regno);
8112
8113 if (TARGET_ABI_OSF
8114 && TARGET_IEEE_CONFORMANT
8115 && !flag_inhibit_size_directive)
8116 {
8117 /* Set flags in procedure descriptor to request IEEE-conformant
8118 math-library routines. The value we set it to is PDSC_EXC_IEEE
8119 (/usr/include/pdsc.h). */
8120 fputs ("\t.eflag 48\n", file);
8121 }
8122
8123 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8124 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8125 alpha_arg_offset = -frame_size + 48;
8126
8127 /* Describe our frame. If the frame size is larger than an integer,
8128 print it as zero to avoid an assembler error. We won't be
8129 properly describing such a frame, but that's the best we can do. */
8130 if (TARGET_ABI_OPEN_VMS)
8131 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8132 HOST_WIDE_INT_PRINT_DEC "\n",
8133 vms_unwind_regno,
8134 frame_size >= (1UL << 31) ? 0 : frame_size,
8135 reg_offset);
8136 else if (!flag_inhibit_size_directive)
8137 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8138 (frame_pointer_needed
8139 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8140 frame_size >= max_frame_size ? 0 : frame_size,
8141 crtl->args.pretend_args_size);
8142
8143 /* Describe which registers were spilled. */
8144 if (TARGET_ABI_OPEN_VMS)
8145 {
8146 if (imask)
8147 /* ??? Does VMS care if mask contains ra? The old code didn't
8148 set it, so I don't here. */
8149 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8150 if (fmask)
8151 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8152 if (alpha_procedure_type == PT_REGISTER)
8153 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8154 }
8155 else if (!flag_inhibit_size_directive)
8156 {
8157 if (imask)
8158 {
8159 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8160 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8161
8162 for (i = 0; i < 32; ++i)
8163 if (imask & (1UL << i))
8164 reg_offset += 8;
8165 }
8166
8167 if (fmask)
8168 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8169 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8170 }
8171
8172 #if TARGET_ABI_OPEN_VMS
8173 /* If a user condition handler has been installed at some point, emit
8174 the procedure descriptor bits to point the Condition Handling Facility
8175 at the indirection wrapper, and state the fp offset at which the user
8176 handler may be found. */
8177 if (cfun->machine->uses_condition_handler)
8178 {
8179 fprintf (file, "\t.handler __gcc_shell_handler\n");
8180 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8181 }
8182
8183 #ifdef TARGET_VMS_CRASH_DEBUG
8184 /* Support of minimal traceback info. */
8185 switch_to_section (readonly_data_section);
8186 fprintf (file, "\t.align 3\n");
8187 assemble_name (file, fnname); fputs ("..na:\n", file);
8188 fputs ("\t.ascii \"", file);
8189 assemble_name (file, fnname);
8190 fputs ("\\0\"\n", file);
8191 switch_to_section (text_section);
8192 #endif
8193 #endif /* TARGET_ABI_OPEN_VMS */
8194 }
8195
8196 /* Emit the .prologue note at the scheduled end of the prologue. */
8197
8198 static void
8199 alpha_output_function_end_prologue (FILE *file)
8200 {
8201 if (TARGET_ABI_OPEN_VMS)
8202 fputs ("\t.prologue\n", file);
8203 else if (!flag_inhibit_size_directive)
8204 fprintf (file, "\t.prologue %d\n",
8205 alpha_function_needs_gp || cfun->is_thunk);
8206 }
8207
8208 /* Write function epilogue. */
8209
8210 void
8211 alpha_expand_epilogue (void)
8212 {
8213 /* Registers to save. */
8214 unsigned long imask = 0;
8215 unsigned long fmask = 0;
8216 /* Stack space needed for pushing registers clobbered by us. */
8217 HOST_WIDE_INT sa_size;
8218 /* Complete stack size needed. */
8219 HOST_WIDE_INT frame_size;
8220 /* Offset from base reg to register save area. */
8221 HOST_WIDE_INT reg_offset;
8222 int fp_is_frame_pointer, fp_offset;
8223 rtx sa_reg, sa_reg_exp = NULL;
8224 rtx sp_adj1, sp_adj2, mem, reg, insn;
8225 rtx eh_ofs;
8226 rtx cfa_restores = NULL_RTX;
8227 int i;
8228
8229 sa_size = alpha_sa_size ();
8230 frame_size = compute_frame_size (get_frame_size (), sa_size);
8231
8232 if (TARGET_ABI_OPEN_VMS)
8233 {
8234 if (alpha_procedure_type == PT_STACK)
8235 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8236 else
8237 reg_offset = 0;
8238 }
8239 else
8240 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8241
8242 alpha_sa_mask (&imask, &fmask);
8243
8244 fp_is_frame_pointer
8245 = (TARGET_ABI_OPEN_VMS
8246 ? alpha_procedure_type == PT_STACK
8247 : frame_pointer_needed);
8248 fp_offset = 0;
8249 sa_reg = stack_pointer_rtx;
8250
8251 if (crtl->calls_eh_return)
8252 eh_ofs = EH_RETURN_STACKADJ_RTX;
8253 else
8254 eh_ofs = NULL_RTX;
8255
8256 if (sa_size)
8257 {
8258 /* If we have a frame pointer, restore SP from it. */
8259 if (TARGET_ABI_OPEN_VMS
8260 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8261 : frame_pointer_needed)
8262 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8263
8264 /* Cope with very large offsets to the register save area. */
8265 if (reg_offset + sa_size > 0x8000)
8266 {
8267 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8268 HOST_WIDE_INT bias;
8269
8270 if (low + sa_size <= 0x8000)
8271 bias = reg_offset - low, reg_offset = low;
8272 else
8273 bias = reg_offset, reg_offset = 0;
8274
8275 sa_reg = gen_rtx_REG (DImode, 22);
8276 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8277
8278 emit_move_insn (sa_reg, sa_reg_exp);
8279 }
8280
8281 /* Restore registers in order, excepting a true frame pointer. */
8282
8283 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8284 reg = gen_rtx_REG (DImode, REG_RA);
8285 emit_move_insn (reg, mem);
8286 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8287
8288 reg_offset += 8;
8289 imask &= ~(1UL << REG_RA);
8290
8291 for (i = 0; i < 31; ++i)
8292 if (imask & (1UL << i))
8293 {
8294 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8295 fp_offset = reg_offset;
8296 else
8297 {
8298 mem = gen_frame_mem (DImode,
8299 plus_constant (Pmode, sa_reg,
8300 reg_offset));
8301 reg = gen_rtx_REG (DImode, i);
8302 emit_move_insn (reg, mem);
8303 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8304 cfa_restores);
8305 }
8306 reg_offset += 8;
8307 }
8308
8309 for (i = 0; i < 31; ++i)
8310 if (fmask & (1UL << i))
8311 {
8312 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8313 reg_offset));
8314 reg = gen_rtx_REG (DFmode, i+32);
8315 emit_move_insn (reg, mem);
8316 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8317 reg_offset += 8;
8318 }
8319 }
8320
8321 if (frame_size || eh_ofs)
8322 {
8323 sp_adj1 = stack_pointer_rtx;
8324
8325 if (eh_ofs)
8326 {
8327 sp_adj1 = gen_rtx_REG (DImode, 23);
8328 emit_move_insn (sp_adj1,
8329 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8330 }
8331
8332 /* If the stack size is large, begin computation into a temporary
8333 register so as not to interfere with a potential fp restore,
8334 which must be consecutive with an SP restore. */
8335 if (frame_size < 32768 && !cfun->calls_alloca)
8336 sp_adj2 = GEN_INT (frame_size);
8337 else if (frame_size < 0x40007fffL)
8338 {
8339 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8340
8341 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8342 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8343 sp_adj1 = sa_reg;
8344 else
8345 {
8346 sp_adj1 = gen_rtx_REG (DImode, 23);
8347 emit_move_insn (sp_adj1, sp_adj2);
8348 }
8349 sp_adj2 = GEN_INT (low);
8350 }
8351 else
8352 {
8353 rtx tmp = gen_rtx_REG (DImode, 23);
8354 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8355 if (!sp_adj2)
8356 {
8357 /* We can't drop new things to memory this late, afaik,
8358 so build it up by pieces. */
8359 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8360 -(frame_size < 0));
8361 gcc_assert (sp_adj2);
8362 }
8363 }
8364
8365 /* From now on, things must be in order. So emit blockages. */
8366
8367 /* Restore the frame pointer. */
8368 if (fp_is_frame_pointer)
8369 {
8370 emit_insn (gen_blockage ());
8371 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8372 fp_offset));
8373 emit_move_insn (hard_frame_pointer_rtx, mem);
8374 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8375 hard_frame_pointer_rtx, cfa_restores);
8376 }
8377 else if (TARGET_ABI_OPEN_VMS)
8378 {
8379 emit_insn (gen_blockage ());
8380 emit_move_insn (hard_frame_pointer_rtx,
8381 gen_rtx_REG (DImode, vms_save_fp_regno));
8382 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8383 hard_frame_pointer_rtx, cfa_restores);
8384 }
8385
8386 /* Restore the stack pointer. */
8387 emit_insn (gen_blockage ());
8388 if (sp_adj2 == const0_rtx)
8389 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8390 else
8391 insn = emit_move_insn (stack_pointer_rtx,
8392 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8393 REG_NOTES (insn) = cfa_restores;
8394 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8395 RTX_FRAME_RELATED_P (insn) = 1;
8396 }
8397 else
8398 {
8399 gcc_assert (cfa_restores == NULL);
8400
8401 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8402 {
8403 emit_insn (gen_blockage ());
8404 insn = emit_move_insn (hard_frame_pointer_rtx,
8405 gen_rtx_REG (DImode, vms_save_fp_regno));
8406 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8407 RTX_FRAME_RELATED_P (insn) = 1;
8408 }
8409 }
8410 }
8411 \f
8412 /* Output the rest of the textual info surrounding the epilogue. */
8413
8414 void
8415 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8416 {
8417 rtx_insn *insn;
8418
8419 /* We output a nop after noreturn calls at the very end of the function to
8420 ensure that the return address always remains in the caller's code range,
8421 as not doing so might confuse unwinding engines. */
8422 insn = get_last_insn ();
8423 if (!INSN_P (insn))
8424 insn = prev_active_insn (insn);
8425 if (insn && CALL_P (insn))
8426 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8427
8428 #if TARGET_ABI_OPEN_VMS
8429 /* Write the linkage entries. */
8430 alpha_write_linkage (file, fnname);
8431 #endif
8432
8433 /* End the function. */
8434 if (TARGET_ABI_OPEN_VMS
8435 || !flag_inhibit_size_directive)
8436 {
8437 fputs ("\t.end ", file);
8438 assemble_name (file, fnname);
8439 putc ('\n', file);
8440 }
8441 inside_function = FALSE;
8442 }
8443
8444 #if TARGET_ABI_OSF
8445 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8446
8447 In order to avoid the hordes of differences between generated code
8448 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8449 lots of code loading up large constants, generate rtl and emit it
8450 instead of going straight to text.
8451
8452 Not sure why this idea hasn't been explored before... */
8453
8454 static void
8455 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8456 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8457 tree function)
8458 {
8459 HOST_WIDE_INT hi, lo;
8460 rtx this_rtx, funexp;
8461 rtx_insn *insn;
8462
8463 /* We always require a valid GP. */
8464 emit_insn (gen_prologue_ldgp ());
8465 emit_note (NOTE_INSN_PROLOGUE_END);
8466
8467 /* Find the "this" pointer. If the function returns a structure,
8468 the structure return pointer is in $16. */
8469 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8470 this_rtx = gen_rtx_REG (Pmode, 17);
8471 else
8472 this_rtx = gen_rtx_REG (Pmode, 16);
8473
8474 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8475 entire constant for the add. */
8476 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8477 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8478 if (hi + lo == delta)
8479 {
8480 if (hi)
8481 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8482 if (lo)
8483 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8484 }
8485 else
8486 {
8487 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8488 delta, -(delta < 0));
8489 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8490 }
8491
8492 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8493 if (vcall_offset)
8494 {
8495 rtx tmp, tmp2;
8496
8497 tmp = gen_rtx_REG (Pmode, 0);
8498 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8499
8500 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8501 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8502 if (hi + lo == vcall_offset)
8503 {
8504 if (hi)
8505 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8506 }
8507 else
8508 {
8509 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8510 vcall_offset, -(vcall_offset < 0));
8511 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8512 lo = 0;
8513 }
8514 if (lo)
8515 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8516 else
8517 tmp2 = tmp;
8518 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8519
8520 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8521 }
8522
8523 /* Generate a tail call to the target function. */
8524 if (! TREE_USED (function))
8525 {
8526 assemble_external (function);
8527 TREE_USED (function) = 1;
8528 }
8529 funexp = XEXP (DECL_RTL (function), 0);
8530 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8531 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8532 SIBLING_CALL_P (insn) = 1;
8533
8534 /* Run just enough of rest_of_compilation to get the insns emitted.
8535 There's not really enough bulk here to make other passes such as
8536 instruction scheduling worth while. Note that use_thunk calls
8537 assemble_start_function and assemble_end_function. */
8538 insn = get_insns ();
8539 shorten_branches (insn);
8540 final_start_function (insn, file, 1);
8541 final (insn, file, 1);
8542 final_end_function ();
8543 }
8544 #endif /* TARGET_ABI_OSF */
8545 \f
8546 /* Debugging support. */
8547
8548 #include "gstab.h"
8549
8550 /* Name of the file containing the current function. */
8551
8552 static const char *current_function_file = "";
8553
8554 /* Offsets to alpha virtual arg/local debugging pointers. */
8555
8556 long alpha_arg_offset;
8557 long alpha_auto_offset;
8558 \f
8559 /* Emit a new filename to a stream. */
8560
8561 void
8562 alpha_output_filename (FILE *stream, const char *name)
8563 {
8564 static int first_time = TRUE;
8565
8566 if (first_time)
8567 {
8568 first_time = FALSE;
8569 ++num_source_filenames;
8570 current_function_file = name;
8571 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8572 output_quoted_string (stream, name);
8573 fprintf (stream, "\n");
8574 }
8575
8576 else if (name != current_function_file
8577 && strcmp (name, current_function_file) != 0)
8578 {
8579 ++num_source_filenames;
8580 current_function_file = name;
8581 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8582
8583 output_quoted_string (stream, name);
8584 fprintf (stream, "\n");
8585 }
8586 }
8587 \f
8588 /* Structure to show the current status of registers and memory. */
8589
8590 struct shadow_summary
8591 {
8592 struct {
8593 unsigned int i : 31; /* Mask of int regs */
8594 unsigned int fp : 31; /* Mask of fp regs */
8595 unsigned int mem : 1; /* mem == imem | fpmem */
8596 } used, defd;
8597 };
8598
8599 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8600 to the summary structure. SET is nonzero if the insn is setting the
8601 object, otherwise zero. */
8602
8603 static void
8604 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8605 {
8606 const char *format_ptr;
8607 int i, j;
8608
8609 if (x == 0)
8610 return;
8611
8612 switch (GET_CODE (x))
8613 {
8614 /* ??? Note that this case would be incorrect if the Alpha had a
8615 ZERO_EXTRACT in SET_DEST. */
8616 case SET:
8617 summarize_insn (SET_SRC (x), sum, 0);
8618 summarize_insn (SET_DEST (x), sum, 1);
8619 break;
8620
8621 case CLOBBER:
8622 summarize_insn (XEXP (x, 0), sum, 1);
8623 break;
8624
8625 case USE:
8626 summarize_insn (XEXP (x, 0), sum, 0);
8627 break;
8628
8629 case ASM_OPERANDS:
8630 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8631 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8632 break;
8633
8634 case PARALLEL:
8635 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8636 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8637 break;
8638
8639 case SUBREG:
8640 summarize_insn (SUBREG_REG (x), sum, 0);
8641 break;
8642
8643 case REG:
8644 {
8645 int regno = REGNO (x);
8646 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8647
8648 if (regno == 31 || regno == 63)
8649 break;
8650
8651 if (set)
8652 {
8653 if (regno < 32)
8654 sum->defd.i |= mask;
8655 else
8656 sum->defd.fp |= mask;
8657 }
8658 else
8659 {
8660 if (regno < 32)
8661 sum->used.i |= mask;
8662 else
8663 sum->used.fp |= mask;
8664 }
8665 }
8666 break;
8667
8668 case MEM:
8669 if (set)
8670 sum->defd.mem = 1;
8671 else
8672 sum->used.mem = 1;
8673
8674 /* Find the regs used in memory address computation: */
8675 summarize_insn (XEXP (x, 0), sum, 0);
8676 break;
8677
8678 case CONST_INT: case CONST_DOUBLE:
8679 case SYMBOL_REF: case LABEL_REF: case CONST:
8680 case SCRATCH: case ASM_INPUT:
8681 break;
8682
8683 /* Handle common unary and binary ops for efficiency. */
8684 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8685 case MOD: case UDIV: case UMOD: case AND: case IOR:
8686 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8687 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8688 case NE: case EQ: case GE: case GT: case LE:
8689 case LT: case GEU: case GTU: case LEU: case LTU:
8690 summarize_insn (XEXP (x, 0), sum, 0);
8691 summarize_insn (XEXP (x, 1), sum, 0);
8692 break;
8693
8694 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8695 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8696 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8697 case SQRT: case FFS:
8698 summarize_insn (XEXP (x, 0), sum, 0);
8699 break;
8700
8701 default:
8702 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8703 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8704 switch (format_ptr[i])
8705 {
8706 case 'e':
8707 summarize_insn (XEXP (x, i), sum, 0);
8708 break;
8709
8710 case 'E':
8711 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8712 summarize_insn (XVECEXP (x, i, j), sum, 0);
8713 break;
8714
8715 case 'i':
8716 break;
8717
8718 default:
8719 gcc_unreachable ();
8720 }
8721 }
8722 }
8723
8724 /* Ensure a sufficient number of `trapb' insns are in the code when
8725 the user requests code with a trap precision of functions or
8726 instructions.
8727
8728 In naive mode, when the user requests a trap-precision of
8729 "instruction", a trapb is needed after every instruction that may
8730 generate a trap. This ensures that the code is resumption safe but
8731 it is also slow.
8732
8733 When optimizations are turned on, we delay issuing a trapb as long
8734 as possible. In this context, a trap shadow is the sequence of
8735 instructions that starts with a (potentially) trap generating
8736 instruction and extends to the next trapb or call_pal instruction
8737 (but GCC never generates call_pal by itself). We can delay (and
8738 therefore sometimes omit) a trapb subject to the following
8739 conditions:
8740
8741 (a) On entry to the trap shadow, if any Alpha register or memory
8742 location contains a value that is used as an operand value by some
8743 instruction in the trap shadow (live on entry), then no instruction
8744 in the trap shadow may modify the register or memory location.
8745
8746 (b) Within the trap shadow, the computation of the base register
8747 for a memory load or store instruction may not involve using the
8748 result of an instruction that might generate an UNPREDICTABLE
8749 result.
8750
8751 (c) Within the trap shadow, no register may be used more than once
8752 as a destination register. (This is to make life easier for the
8753 trap-handler.)
8754
8755 (d) The trap shadow may not include any branch instructions. */
8756
8757 static void
8758 alpha_handle_trap_shadows (void)
8759 {
8760 struct shadow_summary shadow;
8761 int trap_pending, exception_nesting;
8762 rtx_insn *i, *n;
8763
8764 trap_pending = 0;
8765 exception_nesting = 0;
8766 shadow.used.i = 0;
8767 shadow.used.fp = 0;
8768 shadow.used.mem = 0;
8769 shadow.defd = shadow.used;
8770
8771 for (i = get_insns (); i ; i = NEXT_INSN (i))
8772 {
8773 if (NOTE_P (i))
8774 {
8775 switch (NOTE_KIND (i))
8776 {
8777 case NOTE_INSN_EH_REGION_BEG:
8778 exception_nesting++;
8779 if (trap_pending)
8780 goto close_shadow;
8781 break;
8782
8783 case NOTE_INSN_EH_REGION_END:
8784 exception_nesting--;
8785 if (trap_pending)
8786 goto close_shadow;
8787 break;
8788
8789 case NOTE_INSN_EPILOGUE_BEG:
8790 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8791 goto close_shadow;
8792 break;
8793 }
8794 }
8795 else if (trap_pending)
8796 {
8797 if (alpha_tp == ALPHA_TP_FUNC)
8798 {
8799 if (JUMP_P (i)
8800 && GET_CODE (PATTERN (i)) == RETURN)
8801 goto close_shadow;
8802 }
8803 else if (alpha_tp == ALPHA_TP_INSN)
8804 {
8805 if (optimize > 0)
8806 {
8807 struct shadow_summary sum;
8808
8809 sum.used.i = 0;
8810 sum.used.fp = 0;
8811 sum.used.mem = 0;
8812 sum.defd = sum.used;
8813
8814 switch (GET_CODE (i))
8815 {
8816 case INSN:
8817 /* Annoyingly, get_attr_trap will die on these. */
8818 if (GET_CODE (PATTERN (i)) == USE
8819 || GET_CODE (PATTERN (i)) == CLOBBER)
8820 break;
8821
8822 summarize_insn (PATTERN (i), &sum, 0);
8823
8824 if ((sum.defd.i & shadow.defd.i)
8825 || (sum.defd.fp & shadow.defd.fp))
8826 {
8827 /* (c) would be violated */
8828 goto close_shadow;
8829 }
8830
8831 /* Combine shadow with summary of current insn: */
8832 shadow.used.i |= sum.used.i;
8833 shadow.used.fp |= sum.used.fp;
8834 shadow.used.mem |= sum.used.mem;
8835 shadow.defd.i |= sum.defd.i;
8836 shadow.defd.fp |= sum.defd.fp;
8837 shadow.defd.mem |= sum.defd.mem;
8838
8839 if ((sum.defd.i & shadow.used.i)
8840 || (sum.defd.fp & shadow.used.fp)
8841 || (sum.defd.mem & shadow.used.mem))
8842 {
8843 /* (a) would be violated (also takes care of (b)) */
8844 gcc_assert (get_attr_trap (i) != TRAP_YES
8845 || (!(sum.defd.i & sum.used.i)
8846 && !(sum.defd.fp & sum.used.fp)));
8847
8848 goto close_shadow;
8849 }
8850 break;
8851
8852 case BARRIER:
8853 /* __builtin_unreachable can expand to no code at all,
8854 leaving (barrier) RTXes in the instruction stream. */
8855 goto close_shadow_notrapb;
8856
8857 case JUMP_INSN:
8858 case CALL_INSN:
8859 case CODE_LABEL:
8860 goto close_shadow;
8861
8862 default:
8863 gcc_unreachable ();
8864 }
8865 }
8866 else
8867 {
8868 close_shadow:
8869 n = emit_insn_before (gen_trapb (), i);
8870 PUT_MODE (n, TImode);
8871 PUT_MODE (i, TImode);
8872 close_shadow_notrapb:
8873 trap_pending = 0;
8874 shadow.used.i = 0;
8875 shadow.used.fp = 0;
8876 shadow.used.mem = 0;
8877 shadow.defd = shadow.used;
8878 }
8879 }
8880 }
8881
8882 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8883 && NONJUMP_INSN_P (i)
8884 && GET_CODE (PATTERN (i)) != USE
8885 && GET_CODE (PATTERN (i)) != CLOBBER
8886 && get_attr_trap (i) == TRAP_YES)
8887 {
8888 if (optimize && !trap_pending)
8889 summarize_insn (PATTERN (i), &shadow, 0);
8890 trap_pending = 1;
8891 }
8892 }
8893 }
8894 \f
8895 /* Alpha can only issue instruction groups simultaneously if they are
8896 suitably aligned. This is very processor-specific. */
8897 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8898 that are marked "fake". These instructions do not exist on that target,
8899 but it is possible to see these insns with deranged combinations of
8900 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8901 choose a result at random. */
8902
8903 enum alphaev4_pipe {
8904 EV4_STOP = 0,
8905 EV4_IB0 = 1,
8906 EV4_IB1 = 2,
8907 EV4_IBX = 4
8908 };
8909
8910 enum alphaev5_pipe {
8911 EV5_STOP = 0,
8912 EV5_NONE = 1,
8913 EV5_E01 = 2,
8914 EV5_E0 = 4,
8915 EV5_E1 = 8,
8916 EV5_FAM = 16,
8917 EV5_FA = 32,
8918 EV5_FM = 64
8919 };
8920
8921 static enum alphaev4_pipe
8922 alphaev4_insn_pipe (rtx_insn *insn)
8923 {
8924 if (recog_memoized (insn) < 0)
8925 return EV4_STOP;
8926 if (get_attr_length (insn) != 4)
8927 return EV4_STOP;
8928
8929 switch (get_attr_type (insn))
8930 {
8931 case TYPE_ILD:
8932 case TYPE_LDSYM:
8933 case TYPE_FLD:
8934 case TYPE_LD_L:
8935 return EV4_IBX;
8936
8937 case TYPE_IADD:
8938 case TYPE_ILOG:
8939 case TYPE_ICMOV:
8940 case TYPE_ICMP:
8941 case TYPE_FST:
8942 case TYPE_SHIFT:
8943 case TYPE_IMUL:
8944 case TYPE_FBR:
8945 case TYPE_MVI: /* fake */
8946 return EV4_IB0;
8947
8948 case TYPE_IST:
8949 case TYPE_MISC:
8950 case TYPE_IBR:
8951 case TYPE_JSR:
8952 case TYPE_CALLPAL:
8953 case TYPE_FCPYS:
8954 case TYPE_FCMOV:
8955 case TYPE_FADD:
8956 case TYPE_FDIV:
8957 case TYPE_FMUL:
8958 case TYPE_ST_C:
8959 case TYPE_MB:
8960 case TYPE_FSQRT: /* fake */
8961 case TYPE_FTOI: /* fake */
8962 case TYPE_ITOF: /* fake */
8963 return EV4_IB1;
8964
8965 default:
8966 gcc_unreachable ();
8967 }
8968 }
8969
8970 static enum alphaev5_pipe
8971 alphaev5_insn_pipe (rtx_insn *insn)
8972 {
8973 if (recog_memoized (insn) < 0)
8974 return EV5_STOP;
8975 if (get_attr_length (insn) != 4)
8976 return EV5_STOP;
8977
8978 switch (get_attr_type (insn))
8979 {
8980 case TYPE_ILD:
8981 case TYPE_FLD:
8982 case TYPE_LDSYM:
8983 case TYPE_IADD:
8984 case TYPE_ILOG:
8985 case TYPE_ICMOV:
8986 case TYPE_ICMP:
8987 return EV5_E01;
8988
8989 case TYPE_IST:
8990 case TYPE_FST:
8991 case TYPE_SHIFT:
8992 case TYPE_IMUL:
8993 case TYPE_MISC:
8994 case TYPE_MVI:
8995 case TYPE_LD_L:
8996 case TYPE_ST_C:
8997 case TYPE_MB:
8998 case TYPE_FTOI: /* fake */
8999 case TYPE_ITOF: /* fake */
9000 return EV5_E0;
9001
9002 case TYPE_IBR:
9003 case TYPE_JSR:
9004 case TYPE_CALLPAL:
9005 return EV5_E1;
9006
9007 case TYPE_FCPYS:
9008 return EV5_FAM;
9009
9010 case TYPE_FBR:
9011 case TYPE_FCMOV:
9012 case TYPE_FADD:
9013 case TYPE_FDIV:
9014 case TYPE_FSQRT: /* fake */
9015 return EV5_FA;
9016
9017 case TYPE_FMUL:
9018 return EV5_FM;
9019
9020 default:
9021 gcc_unreachable ();
9022 }
9023 }
9024
9025 /* IN_USE is a mask of the slots currently filled within the insn group.
9026 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
9027 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
9028
9029 LEN is, of course, the length of the group in bytes. */
9030
9031 static rtx_insn *
9032 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
9033 {
9034 int len, in_use;
9035
9036 len = in_use = 0;
9037
9038 if (! INSN_P (insn)
9039 || GET_CODE (PATTERN (insn)) == CLOBBER
9040 || GET_CODE (PATTERN (insn)) == USE)
9041 goto next_and_done;
9042
9043 while (1)
9044 {
9045 enum alphaev4_pipe pipe;
9046
9047 pipe = alphaev4_insn_pipe (insn);
9048 switch (pipe)
9049 {
9050 case EV4_STOP:
9051 /* Force complex instructions to start new groups. */
9052 if (in_use)
9053 goto done;
9054
9055 /* If this is a completely unrecognized insn, it's an asm.
9056 We don't know how long it is, so record length as -1 to
9057 signal a needed realignment. */
9058 if (recog_memoized (insn) < 0)
9059 len = -1;
9060 else
9061 len = get_attr_length (insn);
9062 goto next_and_done;
9063
9064 case EV4_IBX:
9065 if (in_use & EV4_IB0)
9066 {
9067 if (in_use & EV4_IB1)
9068 goto done;
9069 in_use |= EV4_IB1;
9070 }
9071 else
9072 in_use |= EV4_IB0 | EV4_IBX;
9073 break;
9074
9075 case EV4_IB0:
9076 if (in_use & EV4_IB0)
9077 {
9078 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
9079 goto done;
9080 in_use |= EV4_IB1;
9081 }
9082 in_use |= EV4_IB0;
9083 break;
9084
9085 case EV4_IB1:
9086 if (in_use & EV4_IB1)
9087 goto done;
9088 in_use |= EV4_IB1;
9089 break;
9090
9091 default:
9092 gcc_unreachable ();
9093 }
9094 len += 4;
9095
9096 /* Haifa doesn't do well scheduling branches. */
9097 if (JUMP_P (insn))
9098 goto next_and_done;
9099
9100 next:
9101 insn = next_nonnote_insn (insn);
9102
9103 if (!insn || ! INSN_P (insn))
9104 goto done;
9105
9106 /* Let Haifa tell us where it thinks insn group boundaries are. */
9107 if (GET_MODE (insn) == TImode)
9108 goto done;
9109
9110 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9111 goto next;
9112 }
9113
9114 next_and_done:
9115 insn = next_nonnote_insn (insn);
9116
9117 done:
9118 *plen = len;
9119 *pin_use = in_use;
9120 return insn;
9121 }
9122
9123 /* IN_USE is a mask of the slots currently filled within the insn group.
9124 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9125 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9126
9127 LEN is, of course, the length of the group in bytes. */
9128
9129 static rtx_insn *
9130 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9131 {
9132 int len, in_use;
9133
9134 len = in_use = 0;
9135
9136 if (! INSN_P (insn)
9137 || GET_CODE (PATTERN (insn)) == CLOBBER
9138 || GET_CODE (PATTERN (insn)) == USE)
9139 goto next_and_done;
9140
9141 while (1)
9142 {
9143 enum alphaev5_pipe pipe;
9144
9145 pipe = alphaev5_insn_pipe (insn);
9146 switch (pipe)
9147 {
9148 case EV5_STOP:
9149 /* Force complex instructions to start new groups. */
9150 if (in_use)
9151 goto done;
9152
9153 /* If this is a completely unrecognized insn, it's an asm.
9154 We don't know how long it is, so record length as -1 to
9155 signal a needed realignment. */
9156 if (recog_memoized (insn) < 0)
9157 len = -1;
9158 else
9159 len = get_attr_length (insn);
9160 goto next_and_done;
9161
9162 /* ??? Most of the places below, we would like to assert never
9163 happen, as it would indicate an error either in Haifa, or
9164 in the scheduling description. Unfortunately, Haifa never
9165 schedules the last instruction of the BB, so we don't have
9166 an accurate TI bit to go off. */
9167 case EV5_E01:
9168 if (in_use & EV5_E0)
9169 {
9170 if (in_use & EV5_E1)
9171 goto done;
9172 in_use |= EV5_E1;
9173 }
9174 else
9175 in_use |= EV5_E0 | EV5_E01;
9176 break;
9177
9178 case EV5_E0:
9179 if (in_use & EV5_E0)
9180 {
9181 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9182 goto done;
9183 in_use |= EV5_E1;
9184 }
9185 in_use |= EV5_E0;
9186 break;
9187
9188 case EV5_E1:
9189 if (in_use & EV5_E1)
9190 goto done;
9191 in_use |= EV5_E1;
9192 break;
9193
9194 case EV5_FAM:
9195 if (in_use & EV5_FA)
9196 {
9197 if (in_use & EV5_FM)
9198 goto done;
9199 in_use |= EV5_FM;
9200 }
9201 else
9202 in_use |= EV5_FA | EV5_FAM;
9203 break;
9204
9205 case EV5_FA:
9206 if (in_use & EV5_FA)
9207 goto done;
9208 in_use |= EV5_FA;
9209 break;
9210
9211 case EV5_FM:
9212 if (in_use & EV5_FM)
9213 goto done;
9214 in_use |= EV5_FM;
9215 break;
9216
9217 case EV5_NONE:
9218 break;
9219
9220 default:
9221 gcc_unreachable ();
9222 }
9223 len += 4;
9224
9225 /* Haifa doesn't do well scheduling branches. */
9226 /* ??? If this is predicted not-taken, slotting continues, except
9227 that no more IBR, FBR, or JSR insns may be slotted. */
9228 if (JUMP_P (insn))
9229 goto next_and_done;
9230
9231 next:
9232 insn = next_nonnote_insn (insn);
9233
9234 if (!insn || ! INSN_P (insn))
9235 goto done;
9236
9237 /* Let Haifa tell us where it thinks insn group boundaries are. */
9238 if (GET_MODE (insn) == TImode)
9239 goto done;
9240
9241 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9242 goto next;
9243 }
9244
9245 next_and_done:
9246 insn = next_nonnote_insn (insn);
9247
9248 done:
9249 *plen = len;
9250 *pin_use = in_use;
9251 return insn;
9252 }
9253
9254 static rtx
9255 alphaev4_next_nop (int *pin_use)
9256 {
9257 int in_use = *pin_use;
9258 rtx nop;
9259
9260 if (!(in_use & EV4_IB0))
9261 {
9262 in_use |= EV4_IB0;
9263 nop = gen_nop ();
9264 }
9265 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9266 {
9267 in_use |= EV4_IB1;
9268 nop = gen_nop ();
9269 }
9270 else if (TARGET_FP && !(in_use & EV4_IB1))
9271 {
9272 in_use |= EV4_IB1;
9273 nop = gen_fnop ();
9274 }
9275 else
9276 nop = gen_unop ();
9277
9278 *pin_use = in_use;
9279 return nop;
9280 }
9281
9282 static rtx
9283 alphaev5_next_nop (int *pin_use)
9284 {
9285 int in_use = *pin_use;
9286 rtx nop;
9287
9288 if (!(in_use & EV5_E1))
9289 {
9290 in_use |= EV5_E1;
9291 nop = gen_nop ();
9292 }
9293 else if (TARGET_FP && !(in_use & EV5_FA))
9294 {
9295 in_use |= EV5_FA;
9296 nop = gen_fnop ();
9297 }
9298 else if (TARGET_FP && !(in_use & EV5_FM))
9299 {
9300 in_use |= EV5_FM;
9301 nop = gen_fnop ();
9302 }
9303 else
9304 nop = gen_unop ();
9305
9306 *pin_use = in_use;
9307 return nop;
9308 }
9309
9310 /* The instruction group alignment main loop. */
9311
9312 static void
9313 alpha_align_insns_1 (unsigned int max_align,
9314 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9315 rtx (*next_nop) (int *))
9316 {
9317 /* ALIGN is the known alignment for the insn group. */
9318 unsigned int align;
9319 /* OFS is the offset of the current insn in the insn group. */
9320 int ofs;
9321 int prev_in_use, in_use, len, ldgp;
9322 rtx_insn *i, *next;
9323
9324 /* Let shorten branches care for assigning alignments to code labels. */
9325 shorten_branches (get_insns ());
9326
9327 if (align_functions < 4)
9328 align = 4;
9329 else if ((unsigned int) align_functions < max_align)
9330 align = align_functions;
9331 else
9332 align = max_align;
9333
9334 ofs = prev_in_use = 0;
9335 i = get_insns ();
9336 if (NOTE_P (i))
9337 i = next_nonnote_insn (i);
9338
9339 ldgp = alpha_function_needs_gp ? 8 : 0;
9340
9341 while (i)
9342 {
9343 next = (*next_group) (i, &in_use, &len);
9344
9345 /* When we see a label, resync alignment etc. */
9346 if (LABEL_P (i))
9347 {
9348 unsigned int new_align = 1 << label_to_alignment (i);
9349
9350 if (new_align >= align)
9351 {
9352 align = new_align < max_align ? new_align : max_align;
9353 ofs = 0;
9354 }
9355
9356 else if (ofs & (new_align-1))
9357 ofs = (ofs | (new_align-1)) + 1;
9358 gcc_assert (!len);
9359 }
9360
9361 /* Handle complex instructions special. */
9362 else if (in_use == 0)
9363 {
9364 /* Asms will have length < 0. This is a signal that we have
9365 lost alignment knowledge. Assume, however, that the asm
9366 will not mis-align instructions. */
9367 if (len < 0)
9368 {
9369 ofs = 0;
9370 align = 4;
9371 len = 0;
9372 }
9373 }
9374
9375 /* If the known alignment is smaller than the recognized insn group,
9376 realign the output. */
9377 else if ((int) align < len)
9378 {
9379 unsigned int new_log_align = len > 8 ? 4 : 3;
9380 rtx_insn *prev, *where;
9381
9382 where = prev = prev_nonnote_insn (i);
9383 if (!where || !LABEL_P (where))
9384 where = i;
9385
9386 /* Can't realign between a call and its gp reload. */
9387 if (! (TARGET_EXPLICIT_RELOCS
9388 && prev && CALL_P (prev)))
9389 {
9390 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9391 align = 1 << new_log_align;
9392 ofs = 0;
9393 }
9394 }
9395
9396 /* We may not insert padding inside the initial ldgp sequence. */
9397 else if (ldgp > 0)
9398 ldgp -= len;
9399
9400 /* If the group won't fit in the same INT16 as the previous,
9401 we need to add padding to keep the group together. Rather
9402 than simply leaving the insn filling to the assembler, we
9403 can make use of the knowledge of what sorts of instructions
9404 were issued in the previous group to make sure that all of
9405 the added nops are really free. */
9406 else if (ofs + len > (int) align)
9407 {
9408 int nop_count = (align - ofs) / 4;
9409 rtx_insn *where;
9410
9411 /* Insert nops before labels, branches, and calls to truly merge
9412 the execution of the nops with the previous instruction group. */
9413 where = prev_nonnote_insn (i);
9414 if (where)
9415 {
9416 if (LABEL_P (where))
9417 {
9418 rtx_insn *where2 = prev_nonnote_insn (where);
9419 if (where2 && JUMP_P (where2))
9420 where = where2;
9421 }
9422 else if (NONJUMP_INSN_P (where))
9423 where = i;
9424 }
9425 else
9426 where = i;
9427
9428 do
9429 emit_insn_before ((*next_nop)(&prev_in_use), where);
9430 while (--nop_count);
9431 ofs = 0;
9432 }
9433
9434 ofs = (ofs + len) & (align - 1);
9435 prev_in_use = in_use;
9436 i = next;
9437 }
9438 }
9439
9440 static void
9441 alpha_align_insns (void)
9442 {
9443 if (alpha_tune == PROCESSOR_EV4)
9444 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9445 else if (alpha_tune == PROCESSOR_EV5)
9446 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9447 else
9448 gcc_unreachable ();
9449 }
9450
9451 /* Insert an unop between sibcall or noreturn function call and GP load. */
9452
9453 static void
9454 alpha_pad_function_end (void)
9455 {
9456 rtx_insn *insn, *next;
9457
9458 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9459 {
9460 if (!CALL_P (insn)
9461 || !(SIBLING_CALL_P (insn)
9462 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9463 continue;
9464
9465 /* Make sure we do not split a call and its corresponding
9466 CALL_ARG_LOCATION note. */
9467 next = NEXT_INSN (insn);
9468 if (next == NULL)
9469 continue;
9470 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9471 insn = next;
9472
9473 next = next_active_insn (insn);
9474 if (next)
9475 {
9476 rtx pat = PATTERN (next);
9477
9478 if (GET_CODE (pat) == SET
9479 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9480 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9481 emit_insn_after (gen_unop (), insn);
9482 }
9483 }
9484 }
9485 \f
9486 /* Machine dependent reorg pass. */
9487
9488 static void
9489 alpha_reorg (void)
9490 {
9491 /* Workaround for a linker error that triggers when an exception
9492 handler immediatelly follows a sibcall or a noreturn function.
9493
9494 In the sibcall case:
9495
9496 The instruction stream from an object file:
9497
9498 1d8: 00 00 fb 6b jmp (t12)
9499 1dc: 00 00 ba 27 ldah gp,0(ra)
9500 1e0: 00 00 bd 23 lda gp,0(gp)
9501 1e4: 00 00 7d a7 ldq t12,0(gp)
9502 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9503
9504 was converted in the final link pass to:
9505
9506 12003aa88: 67 fa ff c3 br 120039428 <...>
9507 12003aa8c: 00 00 fe 2f unop
9508 12003aa90: 00 00 fe 2f unop
9509 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9510 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9511
9512 And in the noreturn case:
9513
9514 The instruction stream from an object file:
9515
9516 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9517 58: 00 00 ba 27 ldah gp,0(ra)
9518 5c: 00 00 bd 23 lda gp,0(gp)
9519 60: 00 00 7d a7 ldq t12,0(gp)
9520 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9521
9522 was converted in the final link pass to:
9523
9524 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9525 fdb28: 00 00 fe 2f unop
9526 fdb2c: 00 00 fe 2f unop
9527 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9528 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9529
9530 GP load instructions were wrongly cleared by the linker relaxation
9531 pass. This workaround prevents removal of GP loads by inserting
9532 an unop instruction between a sibcall or noreturn function call and
9533 exception handler prologue. */
9534
9535 if (current_function_has_exception_handlers ())
9536 alpha_pad_function_end ();
9537 }
9538 \f
9539 static void
9540 alpha_file_start (void)
9541 {
9542 default_file_start ();
9543
9544 fputs ("\t.set noreorder\n", asm_out_file);
9545 fputs ("\t.set volatile\n", asm_out_file);
9546 if (TARGET_ABI_OSF)
9547 fputs ("\t.set noat\n", asm_out_file);
9548 if (TARGET_EXPLICIT_RELOCS)
9549 fputs ("\t.set nomacro\n", asm_out_file);
9550 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9551 {
9552 const char *arch;
9553
9554 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9555 arch = "ev6";
9556 else if (TARGET_MAX)
9557 arch = "pca56";
9558 else if (TARGET_BWX)
9559 arch = "ev56";
9560 else if (alpha_cpu == PROCESSOR_EV5)
9561 arch = "ev5";
9562 else
9563 arch = "ev4";
9564
9565 fprintf (asm_out_file, "\t.arch %s\n", arch);
9566 }
9567 }
9568
9569 /* Since we don't have a .dynbss section, we should not allow global
9570 relocations in the .rodata section. */
9571
9572 static int
9573 alpha_elf_reloc_rw_mask (void)
9574 {
9575 return flag_pic ? 3 : 2;
9576 }
9577
9578 /* Return a section for X. The only special thing we do here is to
9579 honor small data. */
9580
9581 static section *
9582 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9583 unsigned HOST_WIDE_INT align)
9584 {
9585 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9586 /* ??? Consider using mergeable sdata sections. */
9587 return sdata_section;
9588 else
9589 return default_elf_select_rtx_section (mode, x, align);
9590 }
9591
9592 static unsigned int
9593 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9594 {
9595 unsigned int flags = 0;
9596
9597 if (strcmp (name, ".sdata") == 0
9598 || strncmp (name, ".sdata.", 7) == 0
9599 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9600 || strcmp (name, ".sbss") == 0
9601 || strncmp (name, ".sbss.", 6) == 0
9602 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9603 flags = SECTION_SMALL;
9604
9605 flags |= default_section_type_flags (decl, name, reloc);
9606 return flags;
9607 }
9608 \f
9609 /* Structure to collect function names for final output in link section. */
9610 /* Note that items marked with GTY can't be ifdef'ed out. */
9611
9612 enum reloc_kind
9613 {
9614 KIND_LINKAGE,
9615 KIND_CODEADDR
9616 };
9617
9618 struct GTY(()) alpha_links
9619 {
9620 rtx func;
9621 rtx linkage;
9622 enum reloc_kind rkind;
9623 };
9624
9625 #if TARGET_ABI_OPEN_VMS
9626
9627 /* Return the VMS argument type corresponding to MODE. */
9628
9629 enum avms_arg_type
9630 alpha_arg_type (enum machine_mode mode)
9631 {
9632 switch (mode)
9633 {
9634 case SFmode:
9635 return TARGET_FLOAT_VAX ? FF : FS;
9636 case DFmode:
9637 return TARGET_FLOAT_VAX ? FD : FT;
9638 default:
9639 return I64;
9640 }
9641 }
9642
9643 /* Return an rtx for an integer representing the VMS Argument Information
9644 register value. */
9645
9646 rtx
9647 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9648 {
9649 unsigned HOST_WIDE_INT regval = cum.num_args;
9650 int i;
9651
9652 for (i = 0; i < 6; i++)
9653 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9654
9655 return GEN_INT (regval);
9656 }
9657 \f
9658
9659 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9660 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9661 this is the reference to the linkage pointer value, 0 if this is the
9662 reference to the function entry value. RFLAG is 1 if this a reduced
9663 reference (code address only), 0 if this is a full reference. */
9664
9665 rtx
9666 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9667 {
9668 struct alpha_links *al = NULL;
9669 const char *name = XSTR (func, 0);
9670
9671 if (cfun->machine->links)
9672 {
9673 splay_tree_node lnode;
9674
9675 /* Is this name already defined? */
9676 lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name);
9677 if (lnode)
9678 al = (struct alpha_links *) lnode->value;
9679 }
9680 else
9681 cfun->machine->links = splay_tree_new_ggc
9682 ((splay_tree_compare_fn) strcmp,
9683 ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
9684 ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
9685
9686 if (al == NULL)
9687 {
9688 size_t buf_len;
9689 char *linksym;
9690 tree id;
9691
9692 if (name[0] == '*')
9693 name++;
9694
9695 /* Follow transparent alias, as this is used for CRTL translations. */
9696 id = maybe_get_identifier (name);
9697 if (id)
9698 {
9699 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9700 id = TREE_CHAIN (id);
9701 name = IDENTIFIER_POINTER (id);
9702 }
9703
9704 buf_len = strlen (name) + 8 + 9;
9705 linksym = (char *) alloca (buf_len);
9706 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9707
9708 al = ggc_alloc<alpha_links> ();
9709 al->func = func;
9710 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9711
9712 splay_tree_insert (cfun->machine->links,
9713 (splay_tree_key) ggc_strdup (name),
9714 (splay_tree_value) al);
9715 }
9716
9717 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9718
9719 if (lflag)
9720 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9721 else
9722 return al->linkage;
9723 }
9724
9725 static int
9726 alpha_write_one_linkage (splay_tree_node node, void *data)
9727 {
9728 const char *const name = (const char *) node->key;
9729 struct alpha_links *link = (struct alpha_links *) node->value;
9730 FILE *stream = (FILE *) data;
9731
9732 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9733 if (link->rkind == KIND_CODEADDR)
9734 {
9735 /* External and used, request code address. */
9736 fprintf (stream, "\t.code_address ");
9737 }
9738 else
9739 {
9740 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9741 && SYMBOL_REF_LOCAL_P (link->func))
9742 {
9743 /* Locally defined, build linkage pair. */
9744 fprintf (stream, "\t.quad %s..en\n", name);
9745 fprintf (stream, "\t.quad ");
9746 }
9747 else
9748 {
9749 /* External, request linkage pair. */
9750 fprintf (stream, "\t.linkage ");
9751 }
9752 }
9753 assemble_name (stream, name);
9754 fputs ("\n", stream);
9755
9756 return 0;
9757 }
9758
9759 static void
9760 alpha_write_linkage (FILE *stream, const char *funname)
9761 {
9762 fprintf (stream, "\t.link\n");
9763 fprintf (stream, "\t.align 3\n");
9764 in_section = NULL;
9765
9766 #ifdef TARGET_VMS_CRASH_DEBUG
9767 fputs ("\t.name ", stream);
9768 assemble_name (stream, funname);
9769 fputs ("..na\n", stream);
9770 #endif
9771
9772 ASM_OUTPUT_LABEL (stream, funname);
9773 fprintf (stream, "\t.pdesc ");
9774 assemble_name (stream, funname);
9775 fprintf (stream, "..en,%s\n",
9776 alpha_procedure_type == PT_STACK ? "stack"
9777 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9778
9779 if (cfun->machine->links)
9780 {
9781 splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream);
9782 /* splay_tree_delete (func->links); */
9783 }
9784 }
9785
9786 /* Switch to an arbitrary section NAME with attributes as specified
9787 by FLAGS. ALIGN specifies any known alignment requirements for
9788 the section; 0 if the default should be used. */
9789
9790 static void
9791 vms_asm_named_section (const char *name, unsigned int flags,
9792 tree decl ATTRIBUTE_UNUSED)
9793 {
9794 fputc ('\n', asm_out_file);
9795 fprintf (asm_out_file, ".section\t%s", name);
9796
9797 if (flags & SECTION_DEBUG)
9798 fprintf (asm_out_file, ",NOWRT");
9799
9800 fputc ('\n', asm_out_file);
9801 }
9802
9803 /* Record an element in the table of global constructors. SYMBOL is
9804 a SYMBOL_REF of the function to be called; PRIORITY is a number
9805 between 0 and MAX_INIT_PRIORITY.
9806
9807 Differs from default_ctors_section_asm_out_constructor in that the
9808 width of the .ctors entry is always 64 bits, rather than the 32 bits
9809 used by a normal pointer. */
9810
9811 static void
9812 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9813 {
9814 switch_to_section (ctors_section);
9815 assemble_align (BITS_PER_WORD);
9816 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9817 }
9818
9819 static void
9820 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9821 {
9822 switch_to_section (dtors_section);
9823 assemble_align (BITS_PER_WORD);
9824 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9825 }
9826 #else
9827 rtx
9828 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9829 bool lflag ATTRIBUTE_UNUSED,
9830 bool rflag ATTRIBUTE_UNUSED)
9831 {
9832 return NULL_RTX;
9833 }
9834
9835 #endif /* TARGET_ABI_OPEN_VMS */
9836 \f
9837 static void
9838 alpha_init_libfuncs (void)
9839 {
9840 if (TARGET_ABI_OPEN_VMS)
9841 {
9842 /* Use the VMS runtime library functions for division and
9843 remainder. */
9844 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9845 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9846 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9847 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9848 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9849 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9850 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9851 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9852 abort_libfunc = init_one_libfunc ("decc$abort");
9853 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9854 #ifdef MEM_LIBFUNCS_INIT
9855 MEM_LIBFUNCS_INIT;
9856 #endif
9857 }
9858 }
9859
9860 /* On the Alpha, we use this to disable the floating-point registers
9861 when they don't exist. */
9862
9863 static void
9864 alpha_conditional_register_usage (void)
9865 {
9866 int i;
9867 if (! TARGET_FPREGS)
9868 for (i = 32; i < 63; i++)
9869 fixed_regs[i] = call_used_regs[i] = 1;
9870 }
9871
9872 /* Canonicalize a comparison from one we don't have to one we do have. */
9873
9874 static void
9875 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9876 bool op0_preserve_value)
9877 {
9878 if (!op0_preserve_value
9879 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9880 && (REG_P (*op1) || *op1 == const0_rtx))
9881 {
9882 rtx tem = *op0;
9883 *op0 = *op1;
9884 *op1 = tem;
9885 *code = (int)swap_condition ((enum rtx_code)*code);
9886 }
9887
9888 if ((*code == LT || *code == LTU)
9889 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9890 {
9891 *code = *code == LT ? LE : LEU;
9892 *op1 = GEN_INT (255);
9893 }
9894 }
9895
9896 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9897
9898 static void
9899 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9900 {
9901 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9902
9903 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9904 tree new_fenv_var, reload_fenv, restore_fnenv;
9905 tree update_call, atomic_feraiseexcept, hold_fnclex;
9906
9907 /* Assume OSF/1 compatible interfaces. */
9908 if (!TARGET_ABI_OSF)
9909 return;
9910
9911 /* Generate the equivalent of :
9912 unsigned long fenv_var;
9913 fenv_var = __ieee_get_fp_control ();
9914
9915 unsigned long masked_fenv;
9916 masked_fenv = fenv_var & mask;
9917
9918 __ieee_set_fp_control (masked_fenv); */
9919
9920 fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
9921 get_fpscr
9922 = build_fn_decl ("__ieee_get_fp_control",
9923 build_function_type_list (long_unsigned_type_node, NULL));
9924 set_fpscr
9925 = build_fn_decl ("__ieee_set_fp_control",
9926 build_function_type_list (void_type_node, NULL));
9927 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9928 ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
9929 fenv_var, build_call_expr (get_fpscr, 0));
9930 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9931 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9932 *hold = build2 (COMPOUND_EXPR, void_type_node,
9933 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9934 hold_fnclex);
9935
9936 /* Store the value of masked_fenv to clear the exceptions:
9937 __ieee_set_fp_control (masked_fenv); */
9938
9939 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9940
9941 /* Generate the equivalent of :
9942 unsigned long new_fenv_var;
9943 new_fenv_var = __ieee_get_fp_control ();
9944
9945 __ieee_set_fp_control (fenv_var);
9946
9947 __atomic_feraiseexcept (new_fenv_var); */
9948
9949 new_fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
9950 reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
9951 build_call_expr (get_fpscr, 0));
9952 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9953 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9954 update_call
9955 = build_call_expr (atomic_feraiseexcept, 1,
9956 fold_convert (integer_type_node, new_fenv_var));
9957 *update = build2 (COMPOUND_EXPR, void_type_node,
9958 build2 (COMPOUND_EXPR, void_type_node,
9959 reload_fenv, restore_fnenv), update_call);
9960 }
9961 \f
9962 /* Initialize the GCC target structure. */
9963 #if TARGET_ABI_OPEN_VMS
9964 # undef TARGET_ATTRIBUTE_TABLE
9965 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9966 # undef TARGET_CAN_ELIMINATE
9967 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9968 #endif
9969
9970 #undef TARGET_IN_SMALL_DATA_P
9971 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9972
9973 #undef TARGET_ASM_ALIGNED_HI_OP
9974 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9975 #undef TARGET_ASM_ALIGNED_DI_OP
9976 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9977
9978 /* Default unaligned ops are provided for ELF systems. To get unaligned
9979 data for non-ELF systems, we have to turn off auto alignment. */
9980 #if TARGET_ABI_OPEN_VMS
9981 #undef TARGET_ASM_UNALIGNED_HI_OP
9982 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9983 #undef TARGET_ASM_UNALIGNED_SI_OP
9984 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9985 #undef TARGET_ASM_UNALIGNED_DI_OP
9986 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9987 #endif
9988
9989 #undef TARGET_ASM_RELOC_RW_MASK
9990 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9991 #undef TARGET_ASM_SELECT_RTX_SECTION
9992 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9993 #undef TARGET_SECTION_TYPE_FLAGS
9994 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9995
9996 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9997 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9998
9999 #undef TARGET_INIT_LIBFUNCS
10000 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
10001
10002 #undef TARGET_LEGITIMIZE_ADDRESS
10003 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
10004 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
10005 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
10006
10007 #undef TARGET_ASM_FILE_START
10008 #define TARGET_ASM_FILE_START alpha_file_start
10009
10010 #undef TARGET_SCHED_ADJUST_COST
10011 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
10012 #undef TARGET_SCHED_ISSUE_RATE
10013 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
10014 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
10015 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
10016 alpha_multipass_dfa_lookahead
10017
10018 #undef TARGET_HAVE_TLS
10019 #define TARGET_HAVE_TLS HAVE_AS_TLS
10020
10021 #undef TARGET_BUILTIN_DECL
10022 #define TARGET_BUILTIN_DECL alpha_builtin_decl
10023 #undef TARGET_INIT_BUILTINS
10024 #define TARGET_INIT_BUILTINS alpha_init_builtins
10025 #undef TARGET_EXPAND_BUILTIN
10026 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
10027 #undef TARGET_FOLD_BUILTIN
10028 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
10029 #undef TARGET_GIMPLE_FOLD_BUILTIN
10030 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
10031
10032 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10033 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
10034 #undef TARGET_CANNOT_COPY_INSN_P
10035 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
10036 #undef TARGET_LEGITIMATE_CONSTANT_P
10037 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
10038 #undef TARGET_CANNOT_FORCE_CONST_MEM
10039 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
10040
10041 #if TARGET_ABI_OSF
10042 #undef TARGET_ASM_OUTPUT_MI_THUNK
10043 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10044 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10045 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10046 #undef TARGET_STDARG_OPTIMIZE_HOOK
10047 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10048 #endif
10049
10050 /* Use 16-bits anchor. */
10051 #undef TARGET_MIN_ANCHOR_OFFSET
10052 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10053 #undef TARGET_MAX_ANCHOR_OFFSET
10054 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10055 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10056 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10057
10058 #undef TARGET_RTX_COSTS
10059 #define TARGET_RTX_COSTS alpha_rtx_costs
10060 #undef TARGET_ADDRESS_COST
10061 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10062
10063 #undef TARGET_MACHINE_DEPENDENT_REORG
10064 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10065
10066 #undef TARGET_PROMOTE_FUNCTION_MODE
10067 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10068 #undef TARGET_PROMOTE_PROTOTYPES
10069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10070 #undef TARGET_RETURN_IN_MEMORY
10071 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10072 #undef TARGET_PASS_BY_REFERENCE
10073 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10074 #undef TARGET_SETUP_INCOMING_VARARGS
10075 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10076 #undef TARGET_STRICT_ARGUMENT_NAMING
10077 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10078 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10079 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10080 #undef TARGET_SPLIT_COMPLEX_ARG
10081 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10082 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10083 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10084 #undef TARGET_ARG_PARTIAL_BYTES
10085 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10086 #undef TARGET_FUNCTION_ARG
10087 #define TARGET_FUNCTION_ARG alpha_function_arg
10088 #undef TARGET_FUNCTION_ARG_ADVANCE
10089 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10090 #undef TARGET_TRAMPOLINE_INIT
10091 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10092
10093 #undef TARGET_INSTANTIATE_DECLS
10094 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10095
10096 #undef TARGET_SECONDARY_RELOAD
10097 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10098
10099 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10100 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10101 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10102 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10103
10104 #undef TARGET_BUILD_BUILTIN_VA_LIST
10105 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10106
10107 #undef TARGET_EXPAND_BUILTIN_VA_START
10108 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10109
10110 /* The Alpha architecture does not require sequential consistency. See
10111 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
10112 for an example of how it can be violated in practice. */
10113 #undef TARGET_RELAXED_ORDERING
10114 #define TARGET_RELAXED_ORDERING true
10115
10116 #undef TARGET_OPTION_OVERRIDE
10117 #define TARGET_OPTION_OVERRIDE alpha_option_override
10118
10119 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10120 #undef TARGET_MANGLE_TYPE
10121 #define TARGET_MANGLE_TYPE alpha_mangle_type
10122 #endif
10123
10124 #undef TARGET_LEGITIMATE_ADDRESS_P
10125 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10126
10127 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10128 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10129
10130 #undef TARGET_CANONICALIZE_COMPARISON
10131 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10132
10133 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10134 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10135
10136 struct gcc_target targetm = TARGET_INITIALIZER;
10137
10138 \f
10139 #include "gt-alpha.h"