rs6000: Fix rs6000_atomic_assign_expand_fenv [PR94826]
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
83
84 /* This file should be included last. */
85 #include "target-def.h"
86
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
100
101 /* Don't enable PC-relative addressing if the target does not support it. */
102 #ifndef PCREL_SUPPORTED_BY_OS
103 #define PCREL_SUPPORTED_BY_OS 0
104 #endif
105
106 /* Support targetm.vectorize.builtin_mask_for_load. */
107 tree altivec_builtin_mask_for_load;
108
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
113
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
116
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
121
122 #if TARGET_ELF
123 /* Note whether IEEE 128-bit floating point was passed or returned, either as
124 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
125 floating point. We changed the default C++ mangling for these types and we
126 may want to generate a weak alias of the old mangling (U10__float128) to the
127 new mangling (u9__ieee128). */
128 bool rs6000_passes_ieee128 = false;
129 #endif
130
131 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
132 name used in current releases (i.e. u9__ieee128). */
133 static bool ieee128_mangling_gcc_8_1;
134
135 /* Width in bits of a pointer. */
136 unsigned rs6000_pointer_size;
137
138 #ifdef HAVE_AS_GNU_ATTRIBUTE
139 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
140 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
141 # endif
142 /* Flag whether floating point values have been passed/returned.
143 Note that this doesn't say whether fprs are used, since the
144 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
145 should be set for soft-float values passed in gprs and ieee128
146 values passed in vsx registers. */
147 bool rs6000_passes_float = false;
148 bool rs6000_passes_long_double = false;
149 /* Flag whether vector values have been passed/returned. */
150 bool rs6000_passes_vector = false;
151 /* Flag whether small (<= 8 byte) structures have been returned. */
152 bool rs6000_returns_struct = false;
153 #endif
154
155 /* Value is TRUE if register/mode pair is acceptable. */
156 static bool rs6000_hard_regno_mode_ok_p
157 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
158
159 /* Maximum number of registers needed for a given register class and mode. */
160 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
161
162 /* How many registers are needed for a given register and mode. */
163 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
164
165 /* Map register number to register class. */
166 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
167
168 static int dbg_cost_ctrl;
169
170 /* Built in types. */
171 tree rs6000_builtin_types[RS6000_BTI_MAX];
172 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
173
174 /* Flag to say the TOC is initialized */
175 int toc_initialized, need_toc_init;
176 char toc_label_name[10];
177
178 /* Cached value of rs6000_variable_issue. This is cached in
179 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
180 static short cached_can_issue_more;
181
182 static GTY(()) section *read_only_data_section;
183 static GTY(()) section *private_data_section;
184 static GTY(()) section *tls_data_section;
185 static GTY(()) section *tls_private_data_section;
186 static GTY(()) section *read_only_private_data_section;
187 static GTY(()) section *sdata2_section;
188
189 section *toc_section = 0;
190
191 /* Describe the vector unit used for modes. */
192 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
193 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
194
195 /* Register classes for various constraints that are based on the target
196 switches. */
197 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
198
199 /* Describe the alignment of a vector. */
200 int rs6000_vector_align[NUM_MACHINE_MODES];
201
202 /* Map selected modes to types for builtins. */
203 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
204
205 /* What modes to automatically generate reciprocal divide estimate (fre) and
206 reciprocal sqrt (frsqrte) for. */
207 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
208
209 /* Masks to determine which reciprocal esitmate instructions to generate
210 automatically. */
211 enum rs6000_recip_mask {
212 RECIP_SF_DIV = 0x001, /* Use divide estimate */
213 RECIP_DF_DIV = 0x002,
214 RECIP_V4SF_DIV = 0x004,
215 RECIP_V2DF_DIV = 0x008,
216
217 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
218 RECIP_DF_RSQRT = 0x020,
219 RECIP_V4SF_RSQRT = 0x040,
220 RECIP_V2DF_RSQRT = 0x080,
221
222 /* Various combination of flags for -mrecip=xxx. */
223 RECIP_NONE = 0,
224 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
225 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
226 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
227
228 RECIP_HIGH_PRECISION = RECIP_ALL,
229
230 /* On low precision machines like the power5, don't enable double precision
231 reciprocal square root estimate, since it isn't accurate enough. */
232 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
233 };
234
235 /* -mrecip options. */
236 static struct
237 {
238 const char *string; /* option name */
239 unsigned int mask; /* mask bits to set */
240 } recip_options[] = {
241 { "all", RECIP_ALL },
242 { "none", RECIP_NONE },
243 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
244 | RECIP_V2DF_DIV) },
245 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
246 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
247 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
248 | RECIP_V2DF_RSQRT) },
249 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
250 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
251 };
252
253 /* On PowerPC, we have a limited number of target clones that we care about
254 which means we can use an array to hold the options, rather than having more
255 elaborate data structures to identify each possible variation. Order the
256 clones from the default to the highest ISA. */
257 enum {
258 CLONE_DEFAULT = 0, /* default clone. */
259 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
260 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
261 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
262 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
263 CLONE_MAX
264 };
265
266 /* Map compiler ISA bits into HWCAP names. */
267 struct clone_map {
268 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
269 const char *name; /* name to use in __builtin_cpu_supports. */
270 };
271
272 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
273 { 0, "" }, /* Default options. */
274 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
275 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
276 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
277 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
278 };
279
280
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
289
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
299
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
309 };
310
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
319
320
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
324
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
331 };
332
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
343 };
344
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
350 };
351
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
356
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
375 };
376
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
382 {
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
385 }
386
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
390 {
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
393 }
394
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
398 {
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
400 }
401
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
407 {
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
410 }
411
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
420
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
426
427 int
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 {
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
434
435 in_set = single_set (in_insn);
436 if (in_set)
437 {
438 if (MEM_P (SET_DEST (in_set)))
439 {
440 out_set = single_set (out_insn);
441 if (!out_set)
442 {
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
445 {
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 {
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
454 }
455 }
456 }
457 }
458 }
459 else
460 {
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
464
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 {
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
472
473 if (MEM_P (SET_DEST (in_exp)))
474 {
475 out_set = single_set (out_insn);
476 if (!out_set)
477 {
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 {
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
489 }
490 }
491 }
492 }
493 }
494 return store_data_bypass_p (out_insn, in_insn);
495 }
496
497 \f
498 /* Processor costs (relative to an add) */
499
500 const struct processor_costs *rs6000_cost;
501
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
520 };
521
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
540 };
541
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
560 };
561
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
580 };
581
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
600 };
601
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
620 };
621
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
640 };
641
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
660 };
661
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
680 };
681
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
700 };
701
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
720 };
721
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
740 };
741
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
760 };
761
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
780 };
781
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
876 32,
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
941 };
942
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
961 };
962
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
981 };
982
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1001 };
1002
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1021 };
1022
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1041 };
1042
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1061 };
1062
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1081 };
1082
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1096 64,
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1101 };
1102
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1105
1106 \f
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1151
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1154
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1158
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1161
1162 const int INSN_NOT_AVAILABLE = -1;
1163
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1178
1179 /* Hash table stuff for keeping track of TOC entries. */
1180
1181 struct GTY((for_user)) toc_hash_struct
1182 {
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1188 };
1189
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1191 {
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 };
1195
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1197
1198
1199 \f
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1202 {
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1224 };
1225
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1228 {
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1250 };
1251 #endif
1252
1253 /* Table of valid machine attributes. */
1254
1255 static const struct attribute_spec rs6000_attribute_table[] =
1256 {
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 };
1274 \f
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1278 \f
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1286
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1289
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1314
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1319
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1324
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1327
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1340
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1343
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1346
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1349
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1352
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1355
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1358
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1361
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1366
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1369
1370 #undef TARGET_LEGITIMIZE_ADDRESS
1371 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1372
1373 #undef TARGET_SCHED_VARIABLE_ISSUE
1374 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1375
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1378 #undef TARGET_SCHED_ADJUST_COST
1379 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1380 #undef TARGET_SCHED_ADJUST_PRIORITY
1381 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1382 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1383 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1384 #undef TARGET_SCHED_INIT
1385 #define TARGET_SCHED_INIT rs6000_sched_init
1386 #undef TARGET_SCHED_FINISH
1387 #define TARGET_SCHED_FINISH rs6000_sched_finish
1388 #undef TARGET_SCHED_REORDER
1389 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1390 #undef TARGET_SCHED_REORDER2
1391 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1392
1393 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1394 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1395
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1398
1399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1401 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1402 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1403 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1404 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1405 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1406 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1407
1408 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1409 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1410
1411 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1412 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1413 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1414 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1415 rs6000_builtin_support_vector_misalignment
1416 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1417 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1418 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1419 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1420 rs6000_builtin_vectorization_cost
1421 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1422 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1423 rs6000_preferred_simd_mode
1424 #undef TARGET_VECTORIZE_INIT_COST
1425 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1426 #undef TARGET_VECTORIZE_ADD_STMT_COST
1427 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1428 #undef TARGET_VECTORIZE_FINISH_COST
1429 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1430 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1431 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1432
1433 #undef TARGET_LOOP_UNROLL_ADJUST
1434 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1435
1436 #undef TARGET_INIT_BUILTINS
1437 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1438 #undef TARGET_BUILTIN_DECL
1439 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1440
1441 #undef TARGET_FOLD_BUILTIN
1442 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1443 #undef TARGET_GIMPLE_FOLD_BUILTIN
1444 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1445
1446 #undef TARGET_EXPAND_BUILTIN
1447 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1448
1449 #undef TARGET_MANGLE_TYPE
1450 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1451
1452 #undef TARGET_INIT_LIBFUNCS
1453 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1454
1455 #if TARGET_MACHO
1456 #undef TARGET_BINDS_LOCAL_P
1457 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1458 #endif
1459
1460 #undef TARGET_MS_BITFIELD_LAYOUT_P
1461 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1462
1463 #undef TARGET_ASM_OUTPUT_MI_THUNK
1464 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1465
1466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1468
1469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1470 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1471
1472 #undef TARGET_REGISTER_MOVE_COST
1473 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1474 #undef TARGET_MEMORY_MOVE_COST
1475 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1476 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1477 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1478 rs6000_ira_change_pseudo_allocno_class
1479 #undef TARGET_CANNOT_COPY_INSN_P
1480 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1481 #undef TARGET_RTX_COSTS
1482 #define TARGET_RTX_COSTS rs6000_rtx_costs
1483 #undef TARGET_ADDRESS_COST
1484 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1485 #undef TARGET_INSN_COST
1486 #define TARGET_INSN_COST rs6000_insn_cost
1487
1488 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1489 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1490
1491 #undef TARGET_PROMOTE_FUNCTION_MODE
1492 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1493
1494 #undef TARGET_RETURN_IN_MEMORY
1495 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1496
1497 #undef TARGET_RETURN_IN_MSB
1498 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1499
1500 #undef TARGET_SETUP_INCOMING_VARARGS
1501 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1502
1503 /* Always strict argument naming on rs6000. */
1504 #undef TARGET_STRICT_ARGUMENT_NAMING
1505 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1507 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1508 #undef TARGET_SPLIT_COMPLEX_ARG
1509 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1510 #undef TARGET_MUST_PASS_IN_STACK
1511 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1512 #undef TARGET_PASS_BY_REFERENCE
1513 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1514 #undef TARGET_ARG_PARTIAL_BYTES
1515 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1516 #undef TARGET_FUNCTION_ARG_ADVANCE
1517 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1518 #undef TARGET_FUNCTION_ARG
1519 #define TARGET_FUNCTION_ARG rs6000_function_arg
1520 #undef TARGET_FUNCTION_ARG_PADDING
1521 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1522 #undef TARGET_FUNCTION_ARG_BOUNDARY
1523 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1524
1525 #undef TARGET_BUILD_BUILTIN_VA_LIST
1526 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1527
1528 #undef TARGET_EXPAND_BUILTIN_VA_START
1529 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1530
1531 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1532 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1533
1534 #undef TARGET_EH_RETURN_FILTER_MODE
1535 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1536
1537 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1538 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1539
1540 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1541 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1542
1543 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1544 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1545
1546 #undef TARGET_FLOATN_MODE
1547 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1548
1549 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1550 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1551
1552 #undef TARGET_MD_ASM_ADJUST
1553 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1554
1555 #undef TARGET_OPTION_OVERRIDE
1556 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1557
1558 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1559 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1560 rs6000_builtin_vectorized_function
1561
1562 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1564 rs6000_builtin_md_vectorized_function
1565
1566 #undef TARGET_STACK_PROTECT_GUARD
1567 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1568
1569 #if !TARGET_MACHO
1570 #undef TARGET_STACK_PROTECT_FAIL
1571 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1572 #endif
1573
1574 #ifdef HAVE_AS_TLS
1575 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1576 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1577 #endif
1578
1579 /* Use a 32-bit anchor range. This leads to sequences like:
1580
1581 addis tmp,anchor,high
1582 add dest,tmp,low
1583
1584 where tmp itself acts as an anchor, and can be shared between
1585 accesses to the same 64k page. */
1586 #undef TARGET_MIN_ANCHOR_OFFSET
1587 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1588 #undef TARGET_MAX_ANCHOR_OFFSET
1589 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1590 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1591 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1592 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1593 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1594
1595 #undef TARGET_BUILTIN_RECIPROCAL
1596 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1597
1598 #undef TARGET_SECONDARY_RELOAD
1599 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED
1601 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1602 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1603 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1604
1605 #undef TARGET_LEGITIMATE_ADDRESS_P
1606 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1607
1608 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1609 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1610
1611 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1612 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1613
1614 #undef TARGET_CAN_ELIMINATE
1615 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1616
1617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1618 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1619
1620 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1621 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1622
1623 #undef TARGET_TRAMPOLINE_INIT
1624 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1625
1626 #undef TARGET_FUNCTION_VALUE
1627 #define TARGET_FUNCTION_VALUE rs6000_function_value
1628
1629 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1630 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1631
1632 #undef TARGET_OPTION_SAVE
1633 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1634
1635 #undef TARGET_OPTION_RESTORE
1636 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1637
1638 #undef TARGET_OPTION_PRINT
1639 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1640
1641 #undef TARGET_CAN_INLINE_P
1642 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1643
1644 #undef TARGET_SET_CURRENT_FUNCTION
1645 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1646
1647 #undef TARGET_LEGITIMATE_CONSTANT_P
1648 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1649
1650 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1651 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1652
1653 #undef TARGET_CAN_USE_DOLOOP_P
1654 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1655
1656 #undef TARGET_PREDICT_DOLOOP_P
1657 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1658
1659 #undef TARGET_HAVE_COUNT_REG_DECR_P
1660 #define TARGET_HAVE_COUNT_REG_DECR_P true
1661
1662 /* 1000000000 is infinite cost in IVOPTs. */
1663 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1664 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1665
1666 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1667 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1668
1669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1671
1672 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1673 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1674 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1675 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1676 #undef TARGET_UNWIND_WORD_MODE
1677 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1678
1679 #undef TARGET_OFFLOAD_OPTIONS
1680 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1681
1682 #undef TARGET_C_MODE_FOR_SUFFIX
1683 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1684
1685 #undef TARGET_INVALID_BINARY_OP
1686 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1687
1688 #undef TARGET_OPTAB_SUPPORTED_P
1689 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1690
1691 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1692 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1693
1694 #undef TARGET_COMPARE_VERSION_PRIORITY
1695 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1696
1697 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1698 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1699 rs6000_generate_version_dispatcher_body
1700
1701 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1702 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1703 rs6000_get_function_versions_dispatcher
1704
1705 #undef TARGET_OPTION_FUNCTION_VERSIONS
1706 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1707
1708 #undef TARGET_HARD_REGNO_NREGS
1709 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1710 #undef TARGET_HARD_REGNO_MODE_OK
1711 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1712
1713 #undef TARGET_MODES_TIEABLE_P
1714 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1715
1716 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1717 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1718 rs6000_hard_regno_call_part_clobbered
1719
1720 #undef TARGET_SLOW_UNALIGNED_ACCESS
1721 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1722
1723 #undef TARGET_CAN_CHANGE_MODE_CLASS
1724 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1725
1726 #undef TARGET_CONSTANT_ALIGNMENT
1727 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1728
1729 #undef TARGET_STARTING_FRAME_OFFSET
1730 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1731
1732 #if TARGET_ELF && RS6000_WEAK
1733 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1734 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1735 #endif
1736
1737 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1738 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1739
1740 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1741 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1742
1743 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1744 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1745 rs6000_cannot_substitute_mem_equiv_p
1746 \f
1747
1748 /* Processor table. */
1749 struct rs6000_ptt
1750 {
1751 const char *const name; /* Canonical processor name. */
1752 const enum processor_type processor; /* Processor type enum value. */
1753 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1754 };
1755
1756 static struct rs6000_ptt const processor_target_table[] =
1757 {
1758 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1759 #include "rs6000-cpus.def"
1760 #undef RS6000_CPU
1761 };
1762
1763 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1764 name is invalid. */
1765
1766 static int
1767 rs6000_cpu_name_lookup (const char *name)
1768 {
1769 size_t i;
1770
1771 if (name != NULL)
1772 {
1773 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1774 if (! strcmp (name, processor_target_table[i].name))
1775 return (int)i;
1776 }
1777
1778 return -1;
1779 }
1780
1781 \f
1782 /* Return number of consecutive hard regs needed starting at reg REGNO
1783 to hold something of mode MODE.
1784 This is ordinarily the length in words of a value of mode MODE
1785 but can be less for certain modes in special long registers.
1786
1787 POWER and PowerPC GPRs hold 32 bits worth;
1788 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1789
1790 static int
1791 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1792 {
1793 unsigned HOST_WIDE_INT reg_size;
1794
1795 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1796 128-bit floating point that can go in vector registers, which has VSX
1797 memory addressing. */
1798 if (FP_REGNO_P (regno))
1799 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1800 ? UNITS_PER_VSX_WORD
1801 : UNITS_PER_FP_WORD);
1802
1803 else if (ALTIVEC_REGNO_P (regno))
1804 reg_size = UNITS_PER_ALTIVEC_WORD;
1805
1806 else
1807 reg_size = UNITS_PER_WORD;
1808
1809 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1810 }
1811
1812 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1813 MODE. */
1814 static int
1815 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1816 {
1817 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1818
1819 if (COMPLEX_MODE_P (mode))
1820 mode = GET_MODE_INNER (mode);
1821
1822 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1823 register combinations, and use PTImode where we need to deal with quad
1824 word memory operations. Don't allow quad words in the argument or frame
1825 pointer registers, just registers 0..31. */
1826 if (mode == PTImode)
1827 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1828 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1829 && ((regno & 1) == 0));
1830
1831 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1832 implementations. Don't allow an item to be split between a FP register
1833 and an Altivec register. Allow TImode in all VSX registers if the user
1834 asked for it. */
1835 if (TARGET_VSX && VSX_REGNO_P (regno)
1836 && (VECTOR_MEM_VSX_P (mode)
1837 || FLOAT128_VECTOR_P (mode)
1838 || reg_addr[mode].scalar_in_vmx_p
1839 || mode == TImode
1840 || (TARGET_VADDUQM && mode == V1TImode)))
1841 {
1842 if (FP_REGNO_P (regno))
1843 return FP_REGNO_P (last_regno);
1844
1845 if (ALTIVEC_REGNO_P (regno))
1846 {
1847 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1848 return 0;
1849
1850 return ALTIVEC_REGNO_P (last_regno);
1851 }
1852 }
1853
1854 /* The GPRs can hold any mode, but values bigger than one register
1855 cannot go past R31. */
1856 if (INT_REGNO_P (regno))
1857 return INT_REGNO_P (last_regno);
1858
1859 /* The float registers (except for VSX vector modes) can only hold floating
1860 modes and DImode. */
1861 if (FP_REGNO_P (regno))
1862 {
1863 if (FLOAT128_VECTOR_P (mode))
1864 return false;
1865
1866 if (SCALAR_FLOAT_MODE_P (mode)
1867 && (mode != TDmode || (regno % 2) == 0)
1868 && FP_REGNO_P (last_regno))
1869 return 1;
1870
1871 if (GET_MODE_CLASS (mode) == MODE_INT)
1872 {
1873 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1874 return 1;
1875
1876 if (TARGET_P8_VECTOR && (mode == SImode))
1877 return 1;
1878
1879 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1880 return 1;
1881 }
1882
1883 return 0;
1884 }
1885
1886 /* The CR register can only hold CC modes. */
1887 if (CR_REGNO_P (regno))
1888 return GET_MODE_CLASS (mode) == MODE_CC;
1889
1890 if (CA_REGNO_P (regno))
1891 return mode == Pmode || mode == SImode;
1892
1893 /* AltiVec only in AldyVec registers. */
1894 if (ALTIVEC_REGNO_P (regno))
1895 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1896 || mode == V1TImode);
1897
1898 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1899 and it must be able to fit within the register set. */
1900
1901 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1902 }
1903
1904 /* Implement TARGET_HARD_REGNO_NREGS. */
1905
1906 static unsigned int
1907 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1908 {
1909 return rs6000_hard_regno_nregs[mode][regno];
1910 }
1911
1912 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1913
1914 static bool
1915 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1916 {
1917 return rs6000_hard_regno_mode_ok_p[mode][regno];
1918 }
1919
1920 /* Implement TARGET_MODES_TIEABLE_P.
1921
1922 PTImode cannot tie with other modes because PTImode is restricted to even
1923 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1924 57744).
1925
1926 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1927 128-bit floating point on VSX systems ties with other vectors. */
1928
1929 static bool
1930 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1931 {
1932 if (mode1 == PTImode)
1933 return mode2 == PTImode;
1934 if (mode2 == PTImode)
1935 return false;
1936
1937 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1938 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1939 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1940 return false;
1941
1942 if (SCALAR_FLOAT_MODE_P (mode1))
1943 return SCALAR_FLOAT_MODE_P (mode2);
1944 if (SCALAR_FLOAT_MODE_P (mode2))
1945 return false;
1946
1947 if (GET_MODE_CLASS (mode1) == MODE_CC)
1948 return GET_MODE_CLASS (mode2) == MODE_CC;
1949 if (GET_MODE_CLASS (mode2) == MODE_CC)
1950 return false;
1951
1952 return true;
1953 }
1954
1955 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1956
1957 static bool
1958 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1959 machine_mode mode)
1960 {
1961 if (TARGET_32BIT
1962 && TARGET_POWERPC64
1963 && GET_MODE_SIZE (mode) > 4
1964 && INT_REGNO_P (regno))
1965 return true;
1966
1967 if (TARGET_VSX
1968 && FP_REGNO_P (regno)
1969 && GET_MODE_SIZE (mode) > 8
1970 && !FLOAT128_2REG_P (mode))
1971 return true;
1972
1973 return false;
1974 }
1975
1976 /* Print interesting facts about registers. */
1977 static void
1978 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1979 {
1980 int r, m;
1981
1982 for (r = first_regno; r <= last_regno; ++r)
1983 {
1984 const char *comma = "";
1985 int len;
1986
1987 if (first_regno == last_regno)
1988 fprintf (stderr, "%s:\t", reg_name);
1989 else
1990 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1991
1992 len = 8;
1993 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1994 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1995 {
1996 if (len > 70)
1997 {
1998 fprintf (stderr, ",\n\t");
1999 len = 8;
2000 comma = "";
2001 }
2002
2003 if (rs6000_hard_regno_nregs[m][r] > 1)
2004 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2005 rs6000_hard_regno_nregs[m][r]);
2006 else
2007 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2008
2009 comma = ", ";
2010 }
2011
2012 if (call_used_or_fixed_reg_p (r))
2013 {
2014 if (len > 70)
2015 {
2016 fprintf (stderr, ",\n\t");
2017 len = 8;
2018 comma = "";
2019 }
2020
2021 len += fprintf (stderr, "%s%s", comma, "call-used");
2022 comma = ", ";
2023 }
2024
2025 if (fixed_regs[r])
2026 {
2027 if (len > 70)
2028 {
2029 fprintf (stderr, ",\n\t");
2030 len = 8;
2031 comma = "";
2032 }
2033
2034 len += fprintf (stderr, "%s%s", comma, "fixed");
2035 comma = ", ";
2036 }
2037
2038 if (len > 70)
2039 {
2040 fprintf (stderr, ",\n\t");
2041 comma = "";
2042 }
2043
2044 len += fprintf (stderr, "%sreg-class = %s", comma,
2045 reg_class_names[(int)rs6000_regno_regclass[r]]);
2046 comma = ", ";
2047
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 comma = "";
2052 }
2053
2054 fprintf (stderr, "%sregno = %d\n", comma, r);
2055 }
2056 }
2057
2058 static const char *
2059 rs6000_debug_vector_unit (enum rs6000_vector v)
2060 {
2061 const char *ret;
2062
2063 switch (v)
2064 {
2065 case VECTOR_NONE: ret = "none"; break;
2066 case VECTOR_ALTIVEC: ret = "altivec"; break;
2067 case VECTOR_VSX: ret = "vsx"; break;
2068 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2069 default: ret = "unknown"; break;
2070 }
2071
2072 return ret;
2073 }
2074
2075 /* Inner function printing just the address mask for a particular reload
2076 register class. */
2077 DEBUG_FUNCTION char *
2078 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2079 {
2080 static char ret[8];
2081 char *p = ret;
2082
2083 if ((mask & RELOAD_REG_VALID) != 0)
2084 *p++ = 'v';
2085 else if (keep_spaces)
2086 *p++ = ' ';
2087
2088 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2089 *p++ = 'm';
2090 else if (keep_spaces)
2091 *p++ = ' ';
2092
2093 if ((mask & RELOAD_REG_INDEXED) != 0)
2094 *p++ = 'i';
2095 else if (keep_spaces)
2096 *p++ = ' ';
2097
2098 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2099 *p++ = 'O';
2100 else if ((mask & RELOAD_REG_OFFSET) != 0)
2101 *p++ = 'o';
2102 else if (keep_spaces)
2103 *p++ = ' ';
2104
2105 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2106 *p++ = '+';
2107 else if (keep_spaces)
2108 *p++ = ' ';
2109
2110 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2111 *p++ = '+';
2112 else if (keep_spaces)
2113 *p++ = ' ';
2114
2115 if ((mask & RELOAD_REG_AND_M16) != 0)
2116 *p++ = '&';
2117 else if (keep_spaces)
2118 *p++ = ' ';
2119
2120 *p = '\0';
2121
2122 return ret;
2123 }
2124
2125 /* Print the address masks in a human readble fashion. */
2126 DEBUG_FUNCTION void
2127 rs6000_debug_print_mode (ssize_t m)
2128 {
2129 ssize_t rc;
2130 int spaces = 0;
2131
2132 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2133 for (rc = 0; rc < N_RELOAD_REG; rc++)
2134 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2135 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2136
2137 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2138 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2139 {
2140 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2141 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2142 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2143 spaces = 0;
2144 }
2145 else
2146 spaces += strlen (" Reload=sl");
2147
2148 if (reg_addr[m].scalar_in_vmx_p)
2149 {
2150 fprintf (stderr, "%*s Upper=y", spaces, "");
2151 spaces = 0;
2152 }
2153 else
2154 spaces += strlen (" Upper=y");
2155
2156 if (rs6000_vector_unit[m] != VECTOR_NONE
2157 || rs6000_vector_mem[m] != VECTOR_NONE)
2158 {
2159 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2160 spaces, "",
2161 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2162 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2163 }
2164
2165 fputs ("\n", stderr);
2166 }
2167
2168 #define DEBUG_FMT_ID "%-32s= "
2169 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2170 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2171 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2172
2173 /* Print various interesting information with -mdebug=reg. */
2174 static void
2175 rs6000_debug_reg_global (void)
2176 {
2177 static const char *const tf[2] = { "false", "true" };
2178 const char *nl = (const char *)0;
2179 int m;
2180 size_t m1, m2, v;
2181 char costly_num[20];
2182 char nop_num[20];
2183 char flags_buffer[40];
2184 const char *costly_str;
2185 const char *nop_str;
2186 const char *trace_str;
2187 const char *abi_str;
2188 const char *cmodel_str;
2189 struct cl_target_option cl_opts;
2190
2191 /* Modes we want tieable information on. */
2192 static const machine_mode print_tieable_modes[] = {
2193 QImode,
2194 HImode,
2195 SImode,
2196 DImode,
2197 TImode,
2198 PTImode,
2199 SFmode,
2200 DFmode,
2201 TFmode,
2202 IFmode,
2203 KFmode,
2204 SDmode,
2205 DDmode,
2206 TDmode,
2207 V16QImode,
2208 V8HImode,
2209 V4SImode,
2210 V2DImode,
2211 V1TImode,
2212 V32QImode,
2213 V16HImode,
2214 V8SImode,
2215 V4DImode,
2216 V2TImode,
2217 V4SFmode,
2218 V2DFmode,
2219 V8SFmode,
2220 V4DFmode,
2221 CCmode,
2222 CCUNSmode,
2223 CCEQmode,
2224 };
2225
2226 /* Virtual regs we are interested in. */
2227 const static struct {
2228 int regno; /* register number. */
2229 const char *name; /* register name. */
2230 } virtual_regs[] = {
2231 { STACK_POINTER_REGNUM, "stack pointer:" },
2232 { TOC_REGNUM, "toc: " },
2233 { STATIC_CHAIN_REGNUM, "static chain: " },
2234 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2235 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2236 { ARG_POINTER_REGNUM, "arg pointer: " },
2237 { FRAME_POINTER_REGNUM, "frame pointer:" },
2238 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2239 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2240 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2241 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2242 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2243 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2244 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2245 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2246 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2247 };
2248
2249 fputs ("\nHard register information:\n", stderr);
2250 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2251 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2252 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2253 LAST_ALTIVEC_REGNO,
2254 "vs");
2255 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2256 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2257 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2258 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2259 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2260 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2261
2262 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2263 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2264 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2265
2266 fprintf (stderr,
2267 "\n"
2268 "d reg_class = %s\n"
2269 "f reg_class = %s\n"
2270 "v reg_class = %s\n"
2271 "wa reg_class = %s\n"
2272 "we reg_class = %s\n"
2273 "wr reg_class = %s\n"
2274 "wx reg_class = %s\n"
2275 "wA reg_class = %s\n"
2276 "\n",
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2279 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2280 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2281 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2282 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2283 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2284 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2285
2286 nl = "\n";
2287 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2288 rs6000_debug_print_mode (m);
2289
2290 fputs ("\n", stderr);
2291
2292 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2293 {
2294 machine_mode mode1 = print_tieable_modes[m1];
2295 bool first_time = true;
2296
2297 nl = (const char *)0;
2298 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2299 {
2300 machine_mode mode2 = print_tieable_modes[m2];
2301 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2302 {
2303 if (first_time)
2304 {
2305 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2306 nl = "\n";
2307 first_time = false;
2308 }
2309
2310 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2311 }
2312 }
2313
2314 if (!first_time)
2315 fputs ("\n", stderr);
2316 }
2317
2318 if (nl)
2319 fputs (nl, stderr);
2320
2321 if (rs6000_recip_control)
2322 {
2323 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2324
2325 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2326 if (rs6000_recip_bits[m])
2327 {
2328 fprintf (stderr,
2329 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2330 GET_MODE_NAME (m),
2331 (RS6000_RECIP_AUTO_RE_P (m)
2332 ? "auto"
2333 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2334 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2335 ? "auto"
2336 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2337 }
2338
2339 fputs ("\n", stderr);
2340 }
2341
2342 if (rs6000_cpu_index >= 0)
2343 {
2344 const char *name = processor_target_table[rs6000_cpu_index].name;
2345 HOST_WIDE_INT flags
2346 = processor_target_table[rs6000_cpu_index].target_enable;
2347
2348 sprintf (flags_buffer, "-mcpu=%s flags", name);
2349 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2350 }
2351 else
2352 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2353
2354 if (rs6000_tune_index >= 0)
2355 {
2356 const char *name = processor_target_table[rs6000_tune_index].name;
2357 HOST_WIDE_INT flags
2358 = processor_target_table[rs6000_tune_index].target_enable;
2359
2360 sprintf (flags_buffer, "-mtune=%s flags", name);
2361 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2362 }
2363 else
2364 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2365
2366 cl_target_option_save (&cl_opts, &global_options);
2367 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2368 rs6000_isa_flags);
2369
2370 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2371 rs6000_isa_flags_explicit);
2372
2373 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2374 rs6000_builtin_mask);
2375
2376 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2377
2378 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2379 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2380
2381 switch (rs6000_sched_costly_dep)
2382 {
2383 case max_dep_latency:
2384 costly_str = "max_dep_latency";
2385 break;
2386
2387 case no_dep_costly:
2388 costly_str = "no_dep_costly";
2389 break;
2390
2391 case all_deps_costly:
2392 costly_str = "all_deps_costly";
2393 break;
2394
2395 case true_store_to_load_dep_costly:
2396 costly_str = "true_store_to_load_dep_costly";
2397 break;
2398
2399 case store_to_load_dep_costly:
2400 costly_str = "store_to_load_dep_costly";
2401 break;
2402
2403 default:
2404 costly_str = costly_num;
2405 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2406 break;
2407 }
2408
2409 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2410
2411 switch (rs6000_sched_insert_nops)
2412 {
2413 case sched_finish_regroup_exact:
2414 nop_str = "sched_finish_regroup_exact";
2415 break;
2416
2417 case sched_finish_pad_groups:
2418 nop_str = "sched_finish_pad_groups";
2419 break;
2420
2421 case sched_finish_none:
2422 nop_str = "sched_finish_none";
2423 break;
2424
2425 default:
2426 nop_str = nop_num;
2427 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2428 break;
2429 }
2430
2431 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2432
2433 switch (rs6000_sdata)
2434 {
2435 default:
2436 case SDATA_NONE:
2437 break;
2438
2439 case SDATA_DATA:
2440 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2441 break;
2442
2443 case SDATA_SYSV:
2444 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2445 break;
2446
2447 case SDATA_EABI:
2448 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2449 break;
2450
2451 }
2452
2453 switch (rs6000_traceback)
2454 {
2455 case traceback_default: trace_str = "default"; break;
2456 case traceback_none: trace_str = "none"; break;
2457 case traceback_part: trace_str = "part"; break;
2458 case traceback_full: trace_str = "full"; break;
2459 default: trace_str = "unknown"; break;
2460 }
2461
2462 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2463
2464 switch (rs6000_current_cmodel)
2465 {
2466 case CMODEL_SMALL: cmodel_str = "small"; break;
2467 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2468 case CMODEL_LARGE: cmodel_str = "large"; break;
2469 default: cmodel_str = "unknown"; break;
2470 }
2471
2472 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2473
2474 switch (rs6000_current_abi)
2475 {
2476 case ABI_NONE: abi_str = "none"; break;
2477 case ABI_AIX: abi_str = "aix"; break;
2478 case ABI_ELFv2: abi_str = "ELFv2"; break;
2479 case ABI_V4: abi_str = "V4"; break;
2480 case ABI_DARWIN: abi_str = "darwin"; break;
2481 default: abi_str = "unknown"; break;
2482 }
2483
2484 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2485
2486 if (rs6000_altivec_abi)
2487 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2488
2489 if (rs6000_darwin64_abi)
2490 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2491
2492 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2493 (TARGET_SOFT_FLOAT ? "true" : "false"));
2494
2495 if (TARGET_LINK_STACK)
2496 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2497
2498 if (TARGET_P8_FUSION)
2499 {
2500 char options[80];
2501
2502 strcpy (options, "power8");
2503 if (TARGET_P8_FUSION_SIGN)
2504 strcat (options, ", sign");
2505
2506 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2507 }
2508
2509 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2510 TARGET_SECURE_PLT ? "secure" : "bss");
2511 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2512 aix_struct_return ? "aix" : "sysv");
2513 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2514 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2515 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2516 tf[!!rs6000_align_branch_targets]);
2517 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2518 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2519 rs6000_long_double_type_size);
2520 if (rs6000_long_double_type_size > 64)
2521 {
2522 fprintf (stderr, DEBUG_FMT_S, "long double type",
2523 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2524 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2525 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2526 }
2527 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2528 (int)rs6000_sched_restricted_insns_priority);
2529 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2530 (int)END_BUILTINS);
2531 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2532 (int)RS6000_BUILTIN_COUNT);
2533
2534 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2535 (int)TARGET_FLOAT128_ENABLE_TYPE);
2536
2537 if (TARGET_VSX)
2538 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2539 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2540
2541 if (TARGET_DIRECT_MOVE_128)
2542 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2543 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2544 }
2545
2546 \f
2547 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2548 legitimate address support to figure out the appropriate addressing to
2549 use. */
2550
2551 static void
2552 rs6000_setup_reg_addr_masks (void)
2553 {
2554 ssize_t rc, reg, m, nregs;
2555 addr_mask_type any_addr_mask, addr_mask;
2556
2557 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2558 {
2559 machine_mode m2 = (machine_mode) m;
2560 bool complex_p = false;
2561 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2562 size_t msize;
2563
2564 if (COMPLEX_MODE_P (m2))
2565 {
2566 complex_p = true;
2567 m2 = GET_MODE_INNER (m2);
2568 }
2569
2570 msize = GET_MODE_SIZE (m2);
2571
2572 /* SDmode is special in that we want to access it only via REG+REG
2573 addressing on power7 and above, since we want to use the LFIWZX and
2574 STFIWZX instructions to load it. */
2575 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2576
2577 any_addr_mask = 0;
2578 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2579 {
2580 addr_mask = 0;
2581 reg = reload_reg_map[rc].reg;
2582
2583 /* Can mode values go in the GPR/FPR/Altivec registers? */
2584 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2585 {
2586 bool small_int_vsx_p = (small_int_p
2587 && (rc == RELOAD_REG_FPR
2588 || rc == RELOAD_REG_VMX));
2589
2590 nregs = rs6000_hard_regno_nregs[m][reg];
2591 addr_mask |= RELOAD_REG_VALID;
2592
2593 /* Indicate if the mode takes more than 1 physical register. If
2594 it takes a single register, indicate it can do REG+REG
2595 addressing. Small integers in VSX registers can only do
2596 REG+REG addressing. */
2597 if (small_int_vsx_p)
2598 addr_mask |= RELOAD_REG_INDEXED;
2599 else if (nregs > 1 || m == BLKmode || complex_p)
2600 addr_mask |= RELOAD_REG_MULTIPLE;
2601 else
2602 addr_mask |= RELOAD_REG_INDEXED;
2603
2604 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2605 addressing. If we allow scalars into Altivec registers,
2606 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2607
2608 For VSX systems, we don't allow update addressing for
2609 DFmode/SFmode if those registers can go in both the
2610 traditional floating point registers and Altivec registers.
2611 The load/store instructions for the Altivec registers do not
2612 have update forms. If we allowed update addressing, it seems
2613 to break IV-OPT code using floating point if the index type is
2614 int instead of long (PR target/81550 and target/84042). */
2615
2616 if (TARGET_UPDATE
2617 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2618 && msize <= 8
2619 && !VECTOR_MODE_P (m2)
2620 && !FLOAT128_VECTOR_P (m2)
2621 && !complex_p
2622 && (m != E_DFmode || !TARGET_VSX)
2623 && (m != E_SFmode || !TARGET_P8_VECTOR)
2624 && !small_int_vsx_p)
2625 {
2626 addr_mask |= RELOAD_REG_PRE_INCDEC;
2627
2628 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2629 we don't allow PRE_MODIFY for some multi-register
2630 operations. */
2631 switch (m)
2632 {
2633 default:
2634 addr_mask |= RELOAD_REG_PRE_MODIFY;
2635 break;
2636
2637 case E_DImode:
2638 if (TARGET_POWERPC64)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2641
2642 case E_DFmode:
2643 case E_DDmode:
2644 if (TARGET_HARD_FLOAT)
2645 addr_mask |= RELOAD_REG_PRE_MODIFY;
2646 break;
2647 }
2648 }
2649 }
2650
2651 /* GPR and FPR registers can do REG+OFFSET addressing, except
2652 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2653 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2654 if ((addr_mask != 0) && !indexed_only_p
2655 && msize <= 8
2656 && (rc == RELOAD_REG_GPR
2657 || ((msize == 8 || m2 == SFmode)
2658 && (rc == RELOAD_REG_FPR
2659 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2660 addr_mask |= RELOAD_REG_OFFSET;
2661
2662 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2663 instructions are enabled. The offset for 128-bit VSX registers is
2664 only 12-bits. While GPRs can handle the full offset range, VSX
2665 registers can only handle the restricted range. */
2666 else if ((addr_mask != 0) && !indexed_only_p
2667 && msize == 16 && TARGET_P9_VECTOR
2668 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2669 || (m2 == TImode && TARGET_VSX)))
2670 {
2671 addr_mask |= RELOAD_REG_OFFSET;
2672 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2673 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2674 }
2675
2676 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2677 addressing on 128-bit types. */
2678 if (rc == RELOAD_REG_VMX && msize == 16
2679 && (addr_mask & RELOAD_REG_VALID) != 0)
2680 addr_mask |= RELOAD_REG_AND_M16;
2681
2682 reg_addr[m].addr_mask[rc] = addr_mask;
2683 any_addr_mask |= addr_mask;
2684 }
2685
2686 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2687 }
2688 }
2689
2690 \f
2691 /* Initialize the various global tables that are based on register size. */
2692 static void
2693 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2694 {
2695 ssize_t r, m, c;
2696 int align64;
2697 int align32;
2698
2699 /* Precalculate REGNO_REG_CLASS. */
2700 rs6000_regno_regclass[0] = GENERAL_REGS;
2701 for (r = 1; r < 32; ++r)
2702 rs6000_regno_regclass[r] = BASE_REGS;
2703
2704 for (r = 32; r < 64; ++r)
2705 rs6000_regno_regclass[r] = FLOAT_REGS;
2706
2707 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2708 rs6000_regno_regclass[r] = NO_REGS;
2709
2710 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2711 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2712
2713 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2714 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2715 rs6000_regno_regclass[r] = CR_REGS;
2716
2717 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2718 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2719 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2720 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2721 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2722 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2723 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2724
2725 /* Precalculate register class to simpler reload register class. We don't
2726 need all of the register classes that are combinations of different
2727 classes, just the simple ones that have constraint letters. */
2728 for (c = 0; c < N_REG_CLASSES; c++)
2729 reg_class_to_reg_type[c] = NO_REG_TYPE;
2730
2731 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2734 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2735 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2736 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2737 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2738 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2739 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2740 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2741
2742 if (TARGET_VSX)
2743 {
2744 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2745 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2746 }
2747 else
2748 {
2749 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2750 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2751 }
2752
2753 /* Precalculate the valid memory formats as well as the vector information,
2754 this must be set up before the rs6000_hard_regno_nregs_internal calls
2755 below. */
2756 gcc_assert ((int)VECTOR_NONE == 0);
2757 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2758 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2759
2760 gcc_assert ((int)CODE_FOR_nothing == 0);
2761 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2762
2763 gcc_assert ((int)NO_REGS == 0);
2764 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2765
2766 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2767 believes it can use native alignment or still uses 128-bit alignment. */
2768 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2769 {
2770 align64 = 64;
2771 align32 = 32;
2772 }
2773 else
2774 {
2775 align64 = 128;
2776 align32 = 128;
2777 }
2778
2779 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2780 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2781 if (TARGET_FLOAT128_TYPE)
2782 {
2783 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2784 rs6000_vector_align[KFmode] = 128;
2785
2786 if (FLOAT128_IEEE_P (TFmode))
2787 {
2788 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2789 rs6000_vector_align[TFmode] = 128;
2790 }
2791 }
2792
2793 /* V2DF mode, VSX only. */
2794 if (TARGET_VSX)
2795 {
2796 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2797 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2798 rs6000_vector_align[V2DFmode] = align64;
2799 }
2800
2801 /* V4SF mode, either VSX or Altivec. */
2802 if (TARGET_VSX)
2803 {
2804 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2806 rs6000_vector_align[V4SFmode] = align32;
2807 }
2808 else if (TARGET_ALTIVEC)
2809 {
2810 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2811 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2812 rs6000_vector_align[V4SFmode] = align32;
2813 }
2814
2815 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2816 and stores. */
2817 if (TARGET_ALTIVEC)
2818 {
2819 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2820 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2821 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2822 rs6000_vector_align[V4SImode] = align32;
2823 rs6000_vector_align[V8HImode] = align32;
2824 rs6000_vector_align[V16QImode] = align32;
2825
2826 if (TARGET_VSX)
2827 {
2828 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2829 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2830 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2831 }
2832 else
2833 {
2834 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2835 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2836 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2837 }
2838 }
2839
2840 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2841 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2842 if (TARGET_VSX)
2843 {
2844 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2845 rs6000_vector_unit[V2DImode]
2846 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2847 rs6000_vector_align[V2DImode] = align64;
2848
2849 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2850 rs6000_vector_unit[V1TImode]
2851 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2852 rs6000_vector_align[V1TImode] = 128;
2853 }
2854
2855 /* DFmode, see if we want to use the VSX unit. Memory is handled
2856 differently, so don't set rs6000_vector_mem. */
2857 if (TARGET_VSX)
2858 {
2859 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2860 rs6000_vector_align[DFmode] = 64;
2861 }
2862
2863 /* SFmode, see if we want to use the VSX unit. */
2864 if (TARGET_P8_VECTOR)
2865 {
2866 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2867 rs6000_vector_align[SFmode] = 32;
2868 }
2869
2870 /* Allow TImode in VSX register and set the VSX memory macros. */
2871 if (TARGET_VSX)
2872 {
2873 rs6000_vector_mem[TImode] = VECTOR_VSX;
2874 rs6000_vector_align[TImode] = align64;
2875 }
2876
2877 /* Register class constraints for the constraints that depend on compile
2878 switches. When the VSX code was added, different constraints were added
2879 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2880 of the VSX registers are used. The register classes for scalar floating
2881 point types is set, based on whether we allow that type into the upper
2882 (Altivec) registers. GCC has register classes to target the Altivec
2883 registers for load/store operations, to select using a VSX memory
2884 operation instead of the traditional floating point operation. The
2885 constraints are:
2886
2887 d - Register class to use with traditional DFmode instructions.
2888 f - Register class to use with traditional SFmode instructions.
2889 v - Altivec register.
2890 wa - Any VSX register.
2891 wc - Reserved to represent individual CR bits (used in LLVM).
2892 wn - always NO_REGS.
2893 wr - GPR if 64-bit mode is permitted.
2894 wx - Float register if we can do 32-bit int stores. */
2895
2896 if (TARGET_HARD_FLOAT)
2897 {
2898 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2899 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2900 }
2901
2902 if (TARGET_VSX)
2903 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2904
2905 /* Add conditional constraints based on various options, to allow us to
2906 collapse multiple insn patterns. */
2907 if (TARGET_ALTIVEC)
2908 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2909
2910 if (TARGET_POWERPC64)
2911 {
2912 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2913 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2914 }
2915
2916 if (TARGET_STFIWX)
2917 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2918
2919 /* Support for new direct moves (ISA 3.0 + 64bit). */
2920 if (TARGET_DIRECT_MOVE_128)
2921 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2922
2923 /* Set up the reload helper and direct move functions. */
2924 if (TARGET_VSX || TARGET_ALTIVEC)
2925 {
2926 if (TARGET_64BIT)
2927 {
2928 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2929 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2930 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2931 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2932 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2933 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2934 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2935 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2936 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2937 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2938 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2939 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2940 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2941 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2942 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2943 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2944 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2945 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2946 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2947 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2948
2949 if (FLOAT128_VECTOR_P (KFmode))
2950 {
2951 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2952 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2953 }
2954
2955 if (FLOAT128_VECTOR_P (TFmode))
2956 {
2957 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2958 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2959 }
2960
2961 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2962 available. */
2963 if (TARGET_NO_SDMODE_STACK)
2964 {
2965 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2966 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2967 }
2968
2969 if (TARGET_VSX)
2970 {
2971 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2972 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2973 }
2974
2975 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2976 {
2977 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2978 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2979 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2980 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2981 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2982 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2983 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2984 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2985 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2986
2987 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2988 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2989 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2990 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2991 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2992 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2993 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2994 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2995 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2996
2997 if (FLOAT128_VECTOR_P (KFmode))
2998 {
2999 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3000 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3001 }
3002
3003 if (FLOAT128_VECTOR_P (TFmode))
3004 {
3005 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3006 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3007 }
3008 }
3009 }
3010 else
3011 {
3012 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3013 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3014 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3015 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3016 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3017 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3018 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3019 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3020 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3021 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3022 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3023 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3024 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3025 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3026 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3027 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3028 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3029 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3030 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3031 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3032
3033 if (FLOAT128_VECTOR_P (KFmode))
3034 {
3035 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3036 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3037 }
3038
3039 if (FLOAT128_IEEE_P (TFmode))
3040 {
3041 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3042 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3043 }
3044
3045 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3046 available. */
3047 if (TARGET_NO_SDMODE_STACK)
3048 {
3049 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3050 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3051 }
3052
3053 if (TARGET_VSX)
3054 {
3055 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3056 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3057 }
3058
3059 if (TARGET_DIRECT_MOVE)
3060 {
3061 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3062 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3063 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3064 }
3065 }
3066
3067 reg_addr[DFmode].scalar_in_vmx_p = true;
3068 reg_addr[DImode].scalar_in_vmx_p = true;
3069
3070 if (TARGET_P8_VECTOR)
3071 {
3072 reg_addr[SFmode].scalar_in_vmx_p = true;
3073 reg_addr[SImode].scalar_in_vmx_p = true;
3074
3075 if (TARGET_P9_VECTOR)
3076 {
3077 reg_addr[HImode].scalar_in_vmx_p = true;
3078 reg_addr[QImode].scalar_in_vmx_p = true;
3079 }
3080 }
3081 }
3082
3083 /* Precalculate HARD_REGNO_NREGS. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_nregs[m][r]
3087 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3088
3089 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3090 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3091 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3092 rs6000_hard_regno_mode_ok_p[m][r]
3093 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3094
3095 /* Precalculate CLASS_MAX_NREGS sizes. */
3096 for (c = 0; c < LIM_REG_CLASSES; ++c)
3097 {
3098 int reg_size;
3099
3100 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3101 reg_size = UNITS_PER_VSX_WORD;
3102
3103 else if (c == ALTIVEC_REGS)
3104 reg_size = UNITS_PER_ALTIVEC_WORD;
3105
3106 else if (c == FLOAT_REGS)
3107 reg_size = UNITS_PER_FP_WORD;
3108
3109 else
3110 reg_size = UNITS_PER_WORD;
3111
3112 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3113 {
3114 machine_mode m2 = (machine_mode)m;
3115 int reg_size2 = reg_size;
3116
3117 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3118 in VSX. */
3119 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3120 reg_size2 = UNITS_PER_FP_WORD;
3121
3122 rs6000_class_max_nregs[m][c]
3123 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3124 }
3125 }
3126
3127 /* Calculate which modes to automatically generate code to use a the
3128 reciprocal divide and square root instructions. In the future, possibly
3129 automatically generate the instructions even if the user did not specify
3130 -mrecip. The older machines double precision reciprocal sqrt estimate is
3131 not accurate enough. */
3132 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3133 if (TARGET_FRES)
3134 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3135 if (TARGET_FRE)
3136 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3137 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3138 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3139 if (VECTOR_UNIT_VSX_P (V2DFmode))
3140 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3141
3142 if (TARGET_FRSQRTES)
3143 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3144 if (TARGET_FRSQRTE)
3145 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3146 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3147 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3148 if (VECTOR_UNIT_VSX_P (V2DFmode))
3149 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3150
3151 if (rs6000_recip_control)
3152 {
3153 if (!flag_finite_math_only)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3155 "-ffast-math");
3156 if (flag_trapping_math)
3157 warning (0, "%qs requires %qs or %qs", "-mrecip",
3158 "-fno-trapping-math", "-ffast-math");
3159 if (!flag_reciprocal_math)
3160 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3161 "-ffast-math");
3162 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3163 {
3164 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3165 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3166 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3167
3168 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3169 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3170 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3171
3172 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3173 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3174 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3175
3176 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3177 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3178 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3179
3180 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3181 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3182 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3183
3184 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3185 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3186 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3187
3188 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3189 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3190 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3191
3192 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3193 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3194 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3195 }
3196 }
3197
3198 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3199 legitimate address support to figure out the appropriate addressing to
3200 use. */
3201 rs6000_setup_reg_addr_masks ();
3202
3203 if (global_init_p || TARGET_DEBUG_TARGET)
3204 {
3205 if (TARGET_DEBUG_REG)
3206 rs6000_debug_reg_global ();
3207
3208 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3209 fprintf (stderr,
3210 "SImode variable mult cost = %d\n"
3211 "SImode constant mult cost = %d\n"
3212 "SImode short constant mult cost = %d\n"
3213 "DImode multipliciation cost = %d\n"
3214 "SImode division cost = %d\n"
3215 "DImode division cost = %d\n"
3216 "Simple fp operation cost = %d\n"
3217 "DFmode multiplication cost = %d\n"
3218 "SFmode division cost = %d\n"
3219 "DFmode division cost = %d\n"
3220 "cache line size = %d\n"
3221 "l1 cache size = %d\n"
3222 "l2 cache size = %d\n"
3223 "simultaneous prefetches = %d\n"
3224 "\n",
3225 rs6000_cost->mulsi,
3226 rs6000_cost->mulsi_const,
3227 rs6000_cost->mulsi_const9,
3228 rs6000_cost->muldi,
3229 rs6000_cost->divsi,
3230 rs6000_cost->divdi,
3231 rs6000_cost->fp,
3232 rs6000_cost->dmul,
3233 rs6000_cost->sdiv,
3234 rs6000_cost->ddiv,
3235 rs6000_cost->cache_line_size,
3236 rs6000_cost->l1_cache_size,
3237 rs6000_cost->l2_cache_size,
3238 rs6000_cost->simultaneous_prefetches);
3239 }
3240 }
3241
3242 #if TARGET_MACHO
3243 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3244
3245 static void
3246 darwin_rs6000_override_options (void)
3247 {
3248 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3249 off. */
3250 rs6000_altivec_abi = 1;
3251 TARGET_ALTIVEC_VRSAVE = 1;
3252 rs6000_current_abi = ABI_DARWIN;
3253
3254 if (DEFAULT_ABI == ABI_DARWIN
3255 && TARGET_64BIT)
3256 darwin_one_byte_bool = 1;
3257
3258 if (TARGET_64BIT && ! TARGET_POWERPC64)
3259 {
3260 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3261 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3262 }
3263
3264 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3265 optimisation, and will not work with the most generic case (where the
3266 symbol is undefined external, but there is no symbl stub). */
3267 if (TARGET_64BIT)
3268 rs6000_default_long_calls = 0;
3269
3270 /* ld_classic is (so far) still used for kernel (static) code, and supports
3271 the JBSR longcall / branch islands. */
3272 if (flag_mkernel)
3273 {
3274 rs6000_default_long_calls = 1;
3275
3276 /* Allow a kext author to do -mkernel -mhard-float. */
3277 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3278 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3279 }
3280
3281 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3282 Altivec. */
3283 if (!flag_mkernel && !flag_apple_kext
3284 && TARGET_64BIT
3285 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3286 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3287
3288 /* Unless the user (not the configurer) has explicitly overridden
3289 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3290 G4 unless targeting the kernel. */
3291 if (!flag_mkernel
3292 && !flag_apple_kext
3293 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3294 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3295 && ! global_options_set.x_rs6000_cpu_index)
3296 {
3297 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3298 }
3299 }
3300 #endif
3301
3302 /* If not otherwise specified by a target, make 'long double' equivalent to
3303 'double'. */
3304
3305 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3306 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3307 #endif
3308
3309 /* Return the builtin mask of the various options used that could affect which
3310 builtins were used. In the past we used target_flags, but we've run out of
3311 bits, and some options are no longer in target_flags. */
3312
3313 HOST_WIDE_INT
3314 rs6000_builtin_mask_calculate (void)
3315 {
3316 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3317 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3318 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3319 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3320 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3321 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3322 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3323 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3324 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3325 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3326 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3327 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3328 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3329 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3330 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3331 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3332 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3333 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3334 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3335 | ((TARGET_LONG_DOUBLE_128
3336 && TARGET_HARD_FLOAT
3337 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3338 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3339 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3340 }
3341
3342 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3343 to clobber the XER[CA] bit because clobbering that bit without telling
3344 the compiler worked just fine with versions of GCC before GCC 5, and
3345 breaking a lot of older code in ways that are hard to track down is
3346 not such a great idea. */
3347
3348 static rtx_insn *
3349 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3350 vec<const char *> &/*constraints*/,
3351 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3352 {
3353 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3354 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3355 return NULL;
3356 }
3357
3358 /* Override command line options.
3359
3360 Combine build-specific configuration information with options
3361 specified on the command line to set various state variables which
3362 influence code generation, optimization, and expansion of built-in
3363 functions. Assure that command-line configuration preferences are
3364 compatible with each other and with the build configuration; issue
3365 warnings while adjusting configuration or error messages while
3366 rejecting configuration.
3367
3368 Upon entry to this function:
3369
3370 This function is called once at the beginning of
3371 compilation, and then again at the start and end of compiling
3372 each section of code that has a different configuration, as
3373 indicated, for example, by adding the
3374
3375 __attribute__((__target__("cpu=power9")))
3376
3377 qualifier to a function definition or, for example, by bracketing
3378 code between
3379
3380 #pragma GCC target("altivec")
3381
3382 and
3383
3384 #pragma GCC reset_options
3385
3386 directives. Parameter global_init_p is true for the initial
3387 invocation, which initializes global variables, and false for all
3388 subsequent invocations.
3389
3390
3391 Various global state information is assumed to be valid. This
3392 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3393 default CPU specified at build configure time, TARGET_DEFAULT,
3394 representing the default set of option flags for the default
3395 target, and global_options_set.x_rs6000_isa_flags, representing
3396 which options were requested on the command line.
3397
3398 Upon return from this function:
3399
3400 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3401 was set by name on the command line. Additionally, if certain
3402 attributes are automatically enabled or disabled by this function
3403 in order to assure compatibility between options and
3404 configuration, the flags associated with those attributes are
3405 also set. By setting these "explicit bits", we avoid the risk
3406 that other code might accidentally overwrite these particular
3407 attributes with "default values".
3408
3409 The various bits of rs6000_isa_flags are set to indicate the
3410 target options that have been selected for the most current
3411 compilation efforts. This has the effect of also turning on the
3412 associated TARGET_XXX values since these are macros which are
3413 generally defined to test the corresponding bit of the
3414 rs6000_isa_flags variable.
3415
3416 The variable rs6000_builtin_mask is set to represent the target
3417 options for the most current compilation efforts, consistent with
3418 the current contents of rs6000_isa_flags. This variable controls
3419 expansion of built-in functions.
3420
3421 Various other global variables and fields of global structures
3422 (over 50 in all) are initialized to reflect the desired options
3423 for the most current compilation efforts. */
3424
3425 static bool
3426 rs6000_option_override_internal (bool global_init_p)
3427 {
3428 bool ret = true;
3429
3430 HOST_WIDE_INT set_masks;
3431 HOST_WIDE_INT ignore_masks;
3432 int cpu_index = -1;
3433 int tune_index;
3434 struct cl_target_option *main_target_opt
3435 = ((global_init_p || target_option_default_node == NULL)
3436 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3437
3438 /* Print defaults. */
3439 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3440 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3441
3442 /* Remember the explicit arguments. */
3443 if (global_init_p)
3444 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3445
3446 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3447 library functions, so warn about it. The flag may be useful for
3448 performance studies from time to time though, so don't disable it
3449 entirely. */
3450 if (global_options_set.x_rs6000_alignment_flags
3451 && rs6000_alignment_flags == MASK_ALIGN_POWER
3452 && DEFAULT_ABI == ABI_DARWIN
3453 && TARGET_64BIT)
3454 warning (0, "%qs is not supported for 64-bit Darwin;"
3455 " it is incompatible with the installed C and C++ libraries",
3456 "-malign-power");
3457
3458 /* Numerous experiment shows that IRA based loop pressure
3459 calculation works better for RTL loop invariant motion on targets
3460 with enough (>= 32) registers. It is an expensive optimization.
3461 So it is on only for peak performance. */
3462 if (optimize >= 3 && global_init_p
3463 && !global_options_set.x_flag_ira_loop_pressure)
3464 flag_ira_loop_pressure = 1;
3465
3466 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3467 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3468 options were already specified. */
3469 if (flag_sanitize & SANITIZE_USER_ADDRESS
3470 && !global_options_set.x_flag_asynchronous_unwind_tables)
3471 flag_asynchronous_unwind_tables = 1;
3472
3473 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3474 loop unroller is active. It is only checked during unrolling, so
3475 we can just set it on by default. */
3476 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3477 flag_variable_expansion_in_unroller = 1;
3478
3479 /* Set the pointer size. */
3480 if (TARGET_64BIT)
3481 {
3482 rs6000_pmode = DImode;
3483 rs6000_pointer_size = 64;
3484 }
3485 else
3486 {
3487 rs6000_pmode = SImode;
3488 rs6000_pointer_size = 32;
3489 }
3490
3491 /* Some OSs don't support saving the high part of 64-bit registers on context
3492 switch. Other OSs don't support saving Altivec registers. On those OSs,
3493 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3494 if the user wants either, the user must explicitly specify them and we
3495 won't interfere with the user's specification. */
3496
3497 set_masks = POWERPC_MASKS;
3498 #ifdef OS_MISSING_POWERPC64
3499 if (OS_MISSING_POWERPC64)
3500 set_masks &= ~OPTION_MASK_POWERPC64;
3501 #endif
3502 #ifdef OS_MISSING_ALTIVEC
3503 if (OS_MISSING_ALTIVEC)
3504 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3505 | OTHER_VSX_VECTOR_MASKS);
3506 #endif
3507
3508 /* Don't override by the processor default if given explicitly. */
3509 set_masks &= ~rs6000_isa_flags_explicit;
3510
3511 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3512 the cpu in a target attribute or pragma, but did not specify a tuning
3513 option, use the cpu for the tuning option rather than the option specified
3514 with -mtune on the command line. Process a '--with-cpu' configuration
3515 request as an implicit --cpu. */
3516 if (rs6000_cpu_index >= 0)
3517 cpu_index = rs6000_cpu_index;
3518 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3519 cpu_index = main_target_opt->x_rs6000_cpu_index;
3520 else if (OPTION_TARGET_CPU_DEFAULT)
3521 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3522
3523 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3524 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3525 with those from the cpu, except for options that were explicitly set. If
3526 we don't have a cpu, do not override the target bits set in
3527 TARGET_DEFAULT. */
3528 if (cpu_index >= 0)
3529 {
3530 rs6000_cpu_index = cpu_index;
3531 rs6000_isa_flags &= ~set_masks;
3532 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3533 & set_masks);
3534 }
3535 else
3536 {
3537 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3538 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3539 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3540 to using rs6000_isa_flags, we need to do the initialization here.
3541
3542 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3543 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3544 HOST_WIDE_INT flags;
3545 if (TARGET_DEFAULT)
3546 flags = TARGET_DEFAULT;
3547 else
3548 {
3549 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3550 const char *default_cpu = (!TARGET_POWERPC64
3551 ? "powerpc"
3552 : (BYTES_BIG_ENDIAN
3553 ? "powerpc64"
3554 : "powerpc64le"));
3555 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3556 flags = processor_target_table[default_cpu_index].target_enable;
3557 }
3558 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3559 }
3560
3561 if (rs6000_tune_index >= 0)
3562 tune_index = rs6000_tune_index;
3563 else if (cpu_index >= 0)
3564 rs6000_tune_index = tune_index = cpu_index;
3565 else
3566 {
3567 size_t i;
3568 enum processor_type tune_proc
3569 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3570
3571 tune_index = -1;
3572 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3573 if (processor_target_table[i].processor == tune_proc)
3574 {
3575 tune_index = i;
3576 break;
3577 }
3578 }
3579
3580 if (cpu_index >= 0)
3581 rs6000_cpu = processor_target_table[cpu_index].processor;
3582 else
3583 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3584
3585 gcc_assert (tune_index >= 0);
3586 rs6000_tune = processor_target_table[tune_index].processor;
3587
3588 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3589 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3590 || rs6000_cpu == PROCESSOR_PPCE5500)
3591 {
3592 if (TARGET_ALTIVEC)
3593 error ("AltiVec not supported in this target");
3594 }
3595
3596 /* If we are optimizing big endian systems for space, use the load/store
3597 multiple instructions. */
3598 if (BYTES_BIG_ENDIAN && optimize_size)
3599 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3600
3601 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3602 because the hardware doesn't support the instructions used in little
3603 endian mode, and causes an alignment trap. The 750 does not cause an
3604 alignment trap (except when the target is unaligned). */
3605
3606 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3607 {
3608 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3609 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3610 warning (0, "%qs is not supported on little endian systems",
3611 "-mmultiple");
3612 }
3613
3614 /* If little-endian, default to -mstrict-align on older processors.
3615 Testing for htm matches power8 and later. */
3616 if (!BYTES_BIG_ENDIAN
3617 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3618 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3619
3620 if (!rs6000_fold_gimple)
3621 fprintf (stderr,
3622 "gimple folding of rs6000 builtins has been disabled.\n");
3623
3624 /* Add some warnings for VSX. */
3625 if (TARGET_VSX)
3626 {
3627 const char *msg = NULL;
3628 if (!TARGET_HARD_FLOAT)
3629 {
3630 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3631 msg = N_("%<-mvsx%> requires hardware floating point");
3632 else
3633 {
3634 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3635 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3636 }
3637 }
3638 else if (TARGET_AVOID_XFORM > 0)
3639 msg = N_("%<-mvsx%> needs indexed addressing");
3640 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3641 & OPTION_MASK_ALTIVEC))
3642 {
3643 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3644 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3645 else
3646 msg = N_("%<-mno-altivec%> disables vsx");
3647 }
3648
3649 if (msg)
3650 {
3651 warning (0, msg);
3652 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3653 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3654 }
3655 }
3656
3657 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3658 the -mcpu setting to enable options that conflict. */
3659 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3660 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3661 | OPTION_MASK_ALTIVEC
3662 | OPTION_MASK_VSX)) != 0)
3663 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3664 | OPTION_MASK_DIRECT_MOVE)
3665 & ~rs6000_isa_flags_explicit);
3666
3667 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3668 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3669
3670 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3671 off all of the options that depend on those flags. */
3672 ignore_masks = rs6000_disable_incompatible_switches ();
3673
3674 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3675 unless the user explicitly used the -mno-<option> to disable the code. */
3676 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3677 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3678 else if (TARGET_P9_MINMAX)
3679 {
3680 if (cpu_index >= 0)
3681 {
3682 if (cpu_index == PROCESSOR_POWER9)
3683 {
3684 /* legacy behavior: allow -mcpu=power9 with certain
3685 capabilities explicitly disabled. */
3686 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3687 }
3688 else
3689 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3690 "for <xxx> less than power9", "-mcpu");
3691 }
3692 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3693 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3694 & rs6000_isa_flags_explicit))
3695 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3696 were explicitly cleared. */
3697 error ("%qs incompatible with explicitly disabled options",
3698 "-mpower9-minmax");
3699 else
3700 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3701 }
3702 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3703 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_VSX)
3705 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3706 else if (TARGET_POPCNTD)
3707 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3708 else if (TARGET_DFP)
3709 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3710 else if (TARGET_CMPB)
3711 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3712 else if (TARGET_FPRND)
3713 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3714 else if (TARGET_POPCNTB)
3715 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3716 else if (TARGET_ALTIVEC)
3717 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3718
3719 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3720 {
3721 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3722 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3723 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3724 }
3725
3726 if (!TARGET_FPRND && TARGET_VSX)
3727 {
3728 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3729 /* TARGET_VSX = 1 implies Power 7 and newer */
3730 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3731 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3732 }
3733
3734 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3735 {
3736 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3737 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3738 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3739 }
3740
3741 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3742 {
3743 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3744 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3745 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3746 }
3747
3748 if (TARGET_P8_VECTOR && !TARGET_VSX)
3749 {
3750 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3751 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3752 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3753 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3754 {
3755 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3757 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3758 }
3759 else
3760 {
3761 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3762 not explicit. */
3763 rs6000_isa_flags |= OPTION_MASK_VSX;
3764 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3765 }
3766 }
3767
3768 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3769 {
3770 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3771 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3772 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3773 }
3774
3775 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3776 silently turn off quad memory mode. */
3777 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3778 {
3779 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3780 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3781
3782 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3783 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3784
3785 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3786 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3787 }
3788
3789 /* Non-atomic quad memory load/store are disabled for little endian, since
3790 the words are reversed, but atomic operations can still be done by
3791 swapping the words. */
3792 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3793 {
3794 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3795 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3796 "mode"));
3797
3798 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3799 }
3800
3801 /* Assume if the user asked for normal quad memory instructions, they want
3802 the atomic versions as well, unless they explicity told us not to use quad
3803 word atomic instructions. */
3804 if (TARGET_QUAD_MEMORY
3805 && !TARGET_QUAD_MEMORY_ATOMIC
3806 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3807 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3808
3809 /* If we can shrink-wrap the TOC register save separately, then use
3810 -msave-toc-indirect unless explicitly disabled. */
3811 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3812 && flag_shrink_wrap_separate
3813 && optimize_function_for_speed_p (cfun))
3814 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3815
3816 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3817 generating power8 instructions. Power9 does not optimize power8 fusion
3818 cases. */
3819 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3820 {
3821 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3822 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3823 else
3824 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3825 }
3826
3827 /* Setting additional fusion flags turns on base fusion. */
3828 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3829 {
3830 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3831 {
3832 if (TARGET_P8_FUSION_SIGN)
3833 error ("%qs requires %qs", "-mpower8-fusion-sign",
3834 "-mpower8-fusion");
3835
3836 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3837 }
3838 else
3839 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3840 }
3841
3842 /* Power8 does not fuse sign extended loads with the addis. If we are
3843 optimizing at high levels for speed, convert a sign extended load into a
3844 zero extending load, and an explicit sign extension. */
3845 if (TARGET_P8_FUSION
3846 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3847 && optimize_function_for_speed_p (cfun)
3848 && optimize >= 3)
3849 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3850
3851 /* ISA 3.0 vector instructions include ISA 2.07. */
3852 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3853 {
3854 /* We prefer to not mention undocumented options in
3855 error messages. However, if users have managed to select
3856 power9-vector without selecting power8-vector, they
3857 already know about undocumented flags. */
3858 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3859 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3860 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3861 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3862 {
3863 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3864 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3865 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3866 }
3867 else
3868 {
3869 /* OPTION_MASK_P9_VECTOR is explicit and
3870 OPTION_MASK_P8_VECTOR is not explicit. */
3871 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3872 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3873 }
3874 }
3875
3876 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3877 support. If we only have ISA 2.06 support, and the user did not specify
3878 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3879 but we don't enable the full vectorization support */
3880 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3881 TARGET_ALLOW_MOVMISALIGN = 1;
3882
3883 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3884 {
3885 if (TARGET_ALLOW_MOVMISALIGN > 0
3886 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3887 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3888
3889 TARGET_ALLOW_MOVMISALIGN = 0;
3890 }
3891
3892 /* Determine when unaligned vector accesses are permitted, and when
3893 they are preferred over masked Altivec loads. Note that if
3894 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3895 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3896 not true. */
3897 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3898 {
3899 if (!TARGET_VSX)
3900 {
3901 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3902 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3903
3904 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3905 }
3906
3907 else if (!TARGET_ALLOW_MOVMISALIGN)
3908 {
3909 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3910 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3911 "-mallow-movmisalign");
3912
3913 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3914 }
3915 }
3916
3917 /* Use long double size to select the appropriate long double. We use
3918 TYPE_PRECISION to differentiate the 3 different long double types. We map
3919 128 into the precision used for TFmode. */
3920 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3921 ? 64
3922 : FLOAT_PRECISION_TFmode);
3923
3924 /* Set long double size before the IEEE 128-bit tests. */
3925 if (!global_options_set.x_rs6000_long_double_type_size)
3926 {
3927 if (main_target_opt != NULL
3928 && (main_target_opt->x_rs6000_long_double_type_size
3929 != default_long_double_size))
3930 error ("target attribute or pragma changes %<long double%> size");
3931 else
3932 rs6000_long_double_type_size = default_long_double_size;
3933 }
3934 else if (rs6000_long_double_type_size == 128)
3935 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3936 else if (global_options_set.x_rs6000_ieeequad)
3937 {
3938 if (global_options.x_rs6000_ieeequad)
3939 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3940 else
3941 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3942 }
3943
3944 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3945 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3946 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3947 those systems will not pick up this default. Warn if the user changes the
3948 default unless -Wno-psabi. */
3949 if (!global_options_set.x_rs6000_ieeequad)
3950 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3951
3952 else
3953 {
3954 if (global_options.x_rs6000_ieeequad
3955 && (!TARGET_POPCNTD || !TARGET_VSX))
3956 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3957
3958 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3959 {
3960 static bool warned_change_long_double;
3961 if (!warned_change_long_double)
3962 {
3963 warned_change_long_double = true;
3964 if (TARGET_IEEEQUAD)
3965 warning (OPT_Wpsabi, "Using IEEE extended precision "
3966 "%<long double%>");
3967 else
3968 warning (OPT_Wpsabi, "Using IBM extended precision "
3969 "%<long double%>");
3970 }
3971 }
3972 }
3973
3974 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3975 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
3976 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3977 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3978 the keyword as well as the type. */
3979 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3980
3981 /* IEEE 128-bit floating point requires VSX support. */
3982 if (TARGET_FLOAT128_KEYWORD)
3983 {
3984 if (!TARGET_VSX)
3985 {
3986 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3987 error ("%qs requires VSX support", "-mfloat128");
3988
3989 TARGET_FLOAT128_TYPE = 0;
3990 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3991 | OPTION_MASK_FLOAT128_HW);
3992 }
3993 else if (!TARGET_FLOAT128_TYPE)
3994 {
3995 TARGET_FLOAT128_TYPE = 1;
3996 warning (0, "The %<-mfloat128%> option may not be fully supported");
3997 }
3998 }
3999
4000 /* Enable the __float128 keyword under Linux by default. */
4001 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4002 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4003 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4004
4005 /* If we have are supporting the float128 type and full ISA 3.0 support,
4006 enable -mfloat128-hardware by default. However, don't enable the
4007 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4008 because sometimes the compiler wants to put things in an integer
4009 container, and if we don't have __int128 support, it is impossible. */
4010 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4011 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4012 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4013 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4014
4015 if (TARGET_FLOAT128_HW
4016 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4017 {
4018 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4019 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4020
4021 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4022 }
4023
4024 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4025 {
4026 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4027 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4028
4029 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4030 }
4031
4032 /* Enable -mprefixed by default on 'future' systems. */
4033 if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4034 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4035
4036 /* -mprefixed requires -mcpu=future. */
4037 else if (TARGET_PREFIXED && !TARGET_FUTURE)
4038 {
4039 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4040 error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
4041
4042 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4043 }
4044
4045 /* -mpcrel requires prefixed load/store addressing. */
4046 if (TARGET_PCREL && !TARGET_PREFIXED)
4047 {
4048 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4049 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4050
4051 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4052 }
4053
4054 /* Print the options after updating the defaults. */
4055 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4056 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4057
4058 /* E500mc does "better" if we inline more aggressively. Respect the
4059 user's opinion, though. */
4060 if (rs6000_block_move_inline_limit == 0
4061 && (rs6000_tune == PROCESSOR_PPCE500MC
4062 || rs6000_tune == PROCESSOR_PPCE500MC64
4063 || rs6000_tune == PROCESSOR_PPCE5500
4064 || rs6000_tune == PROCESSOR_PPCE6500))
4065 rs6000_block_move_inline_limit = 128;
4066
4067 /* store_one_arg depends on expand_block_move to handle at least the
4068 size of reg_parm_stack_space. */
4069 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4070 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4071
4072 if (global_init_p)
4073 {
4074 /* If the appropriate debug option is enabled, replace the target hooks
4075 with debug versions that call the real version and then prints
4076 debugging information. */
4077 if (TARGET_DEBUG_COST)
4078 {
4079 targetm.rtx_costs = rs6000_debug_rtx_costs;
4080 targetm.address_cost = rs6000_debug_address_cost;
4081 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4082 }
4083
4084 if (TARGET_DEBUG_ADDR)
4085 {
4086 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4087 targetm.legitimize_address = rs6000_debug_legitimize_address;
4088 rs6000_secondary_reload_class_ptr
4089 = rs6000_debug_secondary_reload_class;
4090 targetm.secondary_memory_needed
4091 = rs6000_debug_secondary_memory_needed;
4092 targetm.can_change_mode_class
4093 = rs6000_debug_can_change_mode_class;
4094 rs6000_preferred_reload_class_ptr
4095 = rs6000_debug_preferred_reload_class;
4096 rs6000_mode_dependent_address_ptr
4097 = rs6000_debug_mode_dependent_address;
4098 }
4099
4100 if (rs6000_veclibabi_name)
4101 {
4102 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4103 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4104 else
4105 {
4106 error ("unknown vectorization library ABI type (%qs) for "
4107 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4108 ret = false;
4109 }
4110 }
4111 }
4112
4113 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4114 target attribute or pragma which automatically enables both options,
4115 unless the altivec ABI was set. This is set by default for 64-bit, but
4116 not for 32-bit. */
4117 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4118 {
4119 TARGET_FLOAT128_TYPE = 0;
4120 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4121 | OPTION_MASK_FLOAT128_KEYWORD)
4122 & ~rs6000_isa_flags_explicit);
4123 }
4124
4125 /* Enable Altivec ABI for AIX -maltivec. */
4126 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4127 {
4128 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4129 error ("target attribute or pragma changes AltiVec ABI");
4130 else
4131 rs6000_altivec_abi = 1;
4132 }
4133
4134 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4135 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4136 be explicitly overridden in either case. */
4137 if (TARGET_ELF)
4138 {
4139 if (!global_options_set.x_rs6000_altivec_abi
4140 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4141 {
4142 if (main_target_opt != NULL &&
4143 !main_target_opt->x_rs6000_altivec_abi)
4144 error ("target attribute or pragma changes AltiVec ABI");
4145 else
4146 rs6000_altivec_abi = 1;
4147 }
4148 }
4149
4150 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4151 So far, the only darwin64 targets are also MACH-O. */
4152 if (TARGET_MACHO
4153 && DEFAULT_ABI == ABI_DARWIN
4154 && TARGET_64BIT)
4155 {
4156 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4157 error ("target attribute or pragma changes darwin64 ABI");
4158 else
4159 {
4160 rs6000_darwin64_abi = 1;
4161 /* Default to natural alignment, for better performance. */
4162 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4163 }
4164 }
4165
4166 /* Place FP constants in the constant pool instead of TOC
4167 if section anchors enabled. */
4168 if (flag_section_anchors
4169 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4170 TARGET_NO_FP_IN_TOC = 1;
4171
4172 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4173 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4174
4175 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4176 SUBTARGET_OVERRIDE_OPTIONS;
4177 #endif
4178 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4179 SUBSUBTARGET_OVERRIDE_OPTIONS;
4180 #endif
4181 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4182 SUB3TARGET_OVERRIDE_OPTIONS;
4183 #endif
4184
4185 /* If the ABI has support for PC-relative relocations, enable it by default.
4186 This test depends on the sub-target tests above setting the code model to
4187 medium for ELF v2 systems. */
4188 if (PCREL_SUPPORTED_BY_OS
4189 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4190 rs6000_isa_flags |= OPTION_MASK_PCREL;
4191
4192 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4193 after the subtarget override options are done. */
4194 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4195 {
4196 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4197 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4198
4199 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4200 }
4201
4202 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4203 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4204
4205 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4206 && rs6000_tune != PROCESSOR_POWER5
4207 && rs6000_tune != PROCESSOR_POWER6
4208 && rs6000_tune != PROCESSOR_POWER7
4209 && rs6000_tune != PROCESSOR_POWER8
4210 && rs6000_tune != PROCESSOR_POWER9
4211 && rs6000_tune != PROCESSOR_FUTURE
4212 && rs6000_tune != PROCESSOR_PPCA2
4213 && rs6000_tune != PROCESSOR_CELL
4214 && rs6000_tune != PROCESSOR_PPC476);
4215 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4216 || rs6000_tune == PROCESSOR_POWER5
4217 || rs6000_tune == PROCESSOR_POWER7
4218 || rs6000_tune == PROCESSOR_POWER8);
4219 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4220 || rs6000_tune == PROCESSOR_POWER5
4221 || rs6000_tune == PROCESSOR_POWER6
4222 || rs6000_tune == PROCESSOR_POWER7
4223 || rs6000_tune == PROCESSOR_POWER8
4224 || rs6000_tune == PROCESSOR_POWER9
4225 || rs6000_tune == PROCESSOR_FUTURE
4226 || rs6000_tune == PROCESSOR_PPCE500MC
4227 || rs6000_tune == PROCESSOR_PPCE500MC64
4228 || rs6000_tune == PROCESSOR_PPCE5500
4229 || rs6000_tune == PROCESSOR_PPCE6500);
4230
4231 /* Allow debug switches to override the above settings. These are set to -1
4232 in rs6000.opt to indicate the user hasn't directly set the switch. */
4233 if (TARGET_ALWAYS_HINT >= 0)
4234 rs6000_always_hint = TARGET_ALWAYS_HINT;
4235
4236 if (TARGET_SCHED_GROUPS >= 0)
4237 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4238
4239 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4240 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4241
4242 rs6000_sched_restricted_insns_priority
4243 = (rs6000_sched_groups ? 1 : 0);
4244
4245 /* Handle -msched-costly-dep option. */
4246 rs6000_sched_costly_dep
4247 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4248
4249 if (rs6000_sched_costly_dep_str)
4250 {
4251 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4252 rs6000_sched_costly_dep = no_dep_costly;
4253 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4254 rs6000_sched_costly_dep = all_deps_costly;
4255 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4256 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4257 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4258 rs6000_sched_costly_dep = store_to_load_dep_costly;
4259 else
4260 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4261 atoi (rs6000_sched_costly_dep_str));
4262 }
4263
4264 /* Handle -minsert-sched-nops option. */
4265 rs6000_sched_insert_nops
4266 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4267
4268 if (rs6000_sched_insert_nops_str)
4269 {
4270 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4271 rs6000_sched_insert_nops = sched_finish_none;
4272 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4273 rs6000_sched_insert_nops = sched_finish_pad_groups;
4274 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4275 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4276 else
4277 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4278 atoi (rs6000_sched_insert_nops_str));
4279 }
4280
4281 /* Handle stack protector */
4282 if (!global_options_set.x_rs6000_stack_protector_guard)
4283 #ifdef TARGET_THREAD_SSP_OFFSET
4284 rs6000_stack_protector_guard = SSP_TLS;
4285 #else
4286 rs6000_stack_protector_guard = SSP_GLOBAL;
4287 #endif
4288
4289 #ifdef TARGET_THREAD_SSP_OFFSET
4290 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4291 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4292 #endif
4293
4294 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4295 {
4296 char *endp;
4297 const char *str = rs6000_stack_protector_guard_offset_str;
4298
4299 errno = 0;
4300 long offset = strtol (str, &endp, 0);
4301 if (!*str || *endp || errno)
4302 error ("%qs is not a valid number in %qs", str,
4303 "-mstack-protector-guard-offset=");
4304
4305 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4306 || (TARGET_64BIT && (offset & 3)))
4307 error ("%qs is not a valid offset in %qs", str,
4308 "-mstack-protector-guard-offset=");
4309
4310 rs6000_stack_protector_guard_offset = offset;
4311 }
4312
4313 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4314 {
4315 const char *str = rs6000_stack_protector_guard_reg_str;
4316 int reg = decode_reg_name (str);
4317
4318 if (!IN_RANGE (reg, 1, 31))
4319 error ("%qs is not a valid base register in %qs", str,
4320 "-mstack-protector-guard-reg=");
4321
4322 rs6000_stack_protector_guard_reg = reg;
4323 }
4324
4325 if (rs6000_stack_protector_guard == SSP_TLS
4326 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4327 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4328
4329 if (global_init_p)
4330 {
4331 #ifdef TARGET_REGNAMES
4332 /* If the user desires alternate register names, copy in the
4333 alternate names now. */
4334 if (TARGET_REGNAMES)
4335 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4336 #endif
4337
4338 /* Set aix_struct_return last, after the ABI is determined.
4339 If -maix-struct-return or -msvr4-struct-return was explicitly
4340 used, don't override with the ABI default. */
4341 if (!global_options_set.x_aix_struct_return)
4342 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4343
4344 #if 0
4345 /* IBM XL compiler defaults to unsigned bitfields. */
4346 if (TARGET_XL_COMPAT)
4347 flag_signed_bitfields = 0;
4348 #endif
4349
4350 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4351 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4352
4353 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4354
4355 /* We can only guarantee the availability of DI pseudo-ops when
4356 assembling for 64-bit targets. */
4357 if (!TARGET_64BIT)
4358 {
4359 targetm.asm_out.aligned_op.di = NULL;
4360 targetm.asm_out.unaligned_op.di = NULL;
4361 }
4362
4363
4364 /* Set branch target alignment, if not optimizing for size. */
4365 if (!optimize_size)
4366 {
4367 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4368 aligned 8byte to avoid misprediction by the branch predictor. */
4369 if (rs6000_tune == PROCESSOR_TITAN
4370 || rs6000_tune == PROCESSOR_CELL)
4371 {
4372 if (flag_align_functions && !str_align_functions)
4373 str_align_functions = "8";
4374 if (flag_align_jumps && !str_align_jumps)
4375 str_align_jumps = "8";
4376 if (flag_align_loops && !str_align_loops)
4377 str_align_loops = "8";
4378 }
4379 if (rs6000_align_branch_targets)
4380 {
4381 if (flag_align_functions && !str_align_functions)
4382 str_align_functions = "16";
4383 if (flag_align_jumps && !str_align_jumps)
4384 str_align_jumps = "16";
4385 if (flag_align_loops && !str_align_loops)
4386 {
4387 can_override_loop_align = 1;
4388 str_align_loops = "16";
4389 }
4390 }
4391 }
4392
4393 /* Arrange to save and restore machine status around nested functions. */
4394 init_machine_status = rs6000_init_machine_status;
4395
4396 /* We should always be splitting complex arguments, but we can't break
4397 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4398 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4399 targetm.calls.split_complex_arg = NULL;
4400
4401 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4402 if (DEFAULT_ABI == ABI_AIX)
4403 targetm.calls.custom_function_descriptors = 0;
4404 }
4405
4406 /* Initialize rs6000_cost with the appropriate target costs. */
4407 if (optimize_size)
4408 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4409 else
4410 switch (rs6000_tune)
4411 {
4412 case PROCESSOR_RS64A:
4413 rs6000_cost = &rs64a_cost;
4414 break;
4415
4416 case PROCESSOR_MPCCORE:
4417 rs6000_cost = &mpccore_cost;
4418 break;
4419
4420 case PROCESSOR_PPC403:
4421 rs6000_cost = &ppc403_cost;
4422 break;
4423
4424 case PROCESSOR_PPC405:
4425 rs6000_cost = &ppc405_cost;
4426 break;
4427
4428 case PROCESSOR_PPC440:
4429 rs6000_cost = &ppc440_cost;
4430 break;
4431
4432 case PROCESSOR_PPC476:
4433 rs6000_cost = &ppc476_cost;
4434 break;
4435
4436 case PROCESSOR_PPC601:
4437 rs6000_cost = &ppc601_cost;
4438 break;
4439
4440 case PROCESSOR_PPC603:
4441 rs6000_cost = &ppc603_cost;
4442 break;
4443
4444 case PROCESSOR_PPC604:
4445 rs6000_cost = &ppc604_cost;
4446 break;
4447
4448 case PROCESSOR_PPC604e:
4449 rs6000_cost = &ppc604e_cost;
4450 break;
4451
4452 case PROCESSOR_PPC620:
4453 rs6000_cost = &ppc620_cost;
4454 break;
4455
4456 case PROCESSOR_PPC630:
4457 rs6000_cost = &ppc630_cost;
4458 break;
4459
4460 case PROCESSOR_CELL:
4461 rs6000_cost = &ppccell_cost;
4462 break;
4463
4464 case PROCESSOR_PPC750:
4465 case PROCESSOR_PPC7400:
4466 rs6000_cost = &ppc750_cost;
4467 break;
4468
4469 case PROCESSOR_PPC7450:
4470 rs6000_cost = &ppc7450_cost;
4471 break;
4472
4473 case PROCESSOR_PPC8540:
4474 case PROCESSOR_PPC8548:
4475 rs6000_cost = &ppc8540_cost;
4476 break;
4477
4478 case PROCESSOR_PPCE300C2:
4479 case PROCESSOR_PPCE300C3:
4480 rs6000_cost = &ppce300c2c3_cost;
4481 break;
4482
4483 case PROCESSOR_PPCE500MC:
4484 rs6000_cost = &ppce500mc_cost;
4485 break;
4486
4487 case PROCESSOR_PPCE500MC64:
4488 rs6000_cost = &ppce500mc64_cost;
4489 break;
4490
4491 case PROCESSOR_PPCE5500:
4492 rs6000_cost = &ppce5500_cost;
4493 break;
4494
4495 case PROCESSOR_PPCE6500:
4496 rs6000_cost = &ppce6500_cost;
4497 break;
4498
4499 case PROCESSOR_TITAN:
4500 rs6000_cost = &titan_cost;
4501 break;
4502
4503 case PROCESSOR_POWER4:
4504 case PROCESSOR_POWER5:
4505 rs6000_cost = &power4_cost;
4506 break;
4507
4508 case PROCESSOR_POWER6:
4509 rs6000_cost = &power6_cost;
4510 break;
4511
4512 case PROCESSOR_POWER7:
4513 rs6000_cost = &power7_cost;
4514 break;
4515
4516 case PROCESSOR_POWER8:
4517 rs6000_cost = &power8_cost;
4518 break;
4519
4520 case PROCESSOR_POWER9:
4521 case PROCESSOR_FUTURE:
4522 rs6000_cost = &power9_cost;
4523 break;
4524
4525 case PROCESSOR_PPCA2:
4526 rs6000_cost = &ppca2_cost;
4527 break;
4528
4529 default:
4530 gcc_unreachable ();
4531 }
4532
4533 if (global_init_p)
4534 {
4535 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4536 param_simultaneous_prefetches,
4537 rs6000_cost->simultaneous_prefetches);
4538 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4539 param_l1_cache_size,
4540 rs6000_cost->l1_cache_size);
4541 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4542 param_l1_cache_line_size,
4543 rs6000_cost->cache_line_size);
4544 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4545 param_l2_cache_size,
4546 rs6000_cost->l2_cache_size);
4547
4548 /* Increase loop peeling limits based on performance analysis. */
4549 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4550 param_max_peeled_insns, 400);
4551 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4552 param_max_completely_peeled_insns, 400);
4553
4554 /* Use the 'model' -fsched-pressure algorithm by default. */
4555 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4556 param_sched_pressure_algorithm,
4557 SCHED_PRESSURE_MODEL);
4558
4559 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4560 turns -frename-registers on. */
4561 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4562 || (global_options_set.x_flag_unroll_all_loops
4563 && flag_unroll_all_loops))
4564 {
4565 if (!global_options_set.x_unroll_only_small_loops)
4566 unroll_only_small_loops = 0;
4567 if (!global_options_set.x_flag_rename_registers)
4568 flag_rename_registers = 1;
4569 }
4570
4571 /* If using typedef char *va_list, signal that
4572 __builtin_va_start (&ap, 0) can be optimized to
4573 ap = __builtin_next_arg (0). */
4574 if (DEFAULT_ABI != ABI_V4)
4575 targetm.expand_builtin_va_start = NULL;
4576 }
4577
4578 /* If not explicitly specified via option, decide whether to generate indexed
4579 load/store instructions. A value of -1 indicates that the
4580 initial value of this variable has not been overwritten. During
4581 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4582 if (TARGET_AVOID_XFORM == -1)
4583 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4584 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4585 need indexed accesses and the type used is the scalar type of the element
4586 being loaded or stored. */
4587 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4588 && !TARGET_ALTIVEC);
4589
4590 /* Set the -mrecip options. */
4591 if (rs6000_recip_name)
4592 {
4593 char *p = ASTRDUP (rs6000_recip_name);
4594 char *q;
4595 unsigned int mask, i;
4596 bool invert;
4597
4598 while ((q = strtok (p, ",")) != NULL)
4599 {
4600 p = NULL;
4601 if (*q == '!')
4602 {
4603 invert = true;
4604 q++;
4605 }
4606 else
4607 invert = false;
4608
4609 if (!strcmp (q, "default"))
4610 mask = ((TARGET_RECIP_PRECISION)
4611 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4612 else
4613 {
4614 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4615 if (!strcmp (q, recip_options[i].string))
4616 {
4617 mask = recip_options[i].mask;
4618 break;
4619 }
4620
4621 if (i == ARRAY_SIZE (recip_options))
4622 {
4623 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4624 invert = false;
4625 mask = 0;
4626 ret = false;
4627 }
4628 }
4629
4630 if (invert)
4631 rs6000_recip_control &= ~mask;
4632 else
4633 rs6000_recip_control |= mask;
4634 }
4635 }
4636
4637 /* Set the builtin mask of the various options used that could affect which
4638 builtins were used. In the past we used target_flags, but we've run out
4639 of bits, and some options are no longer in target_flags. */
4640 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4641 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4642 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4643 rs6000_builtin_mask);
4644
4645 /* Initialize all of the registers. */
4646 rs6000_init_hard_regno_mode_ok (global_init_p);
4647
4648 /* Save the initial options in case the user does function specific options */
4649 if (global_init_p)
4650 target_option_default_node = target_option_current_node
4651 = build_target_option_node (&global_options);
4652
4653 /* If not explicitly specified via option, decide whether to generate the
4654 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4655 if (TARGET_LINK_STACK == -1)
4656 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4657
4658 /* Deprecate use of -mno-speculate-indirect-jumps. */
4659 if (!rs6000_speculate_indirect_jumps)
4660 warning (0, "%qs is deprecated and not recommended in any circumstances",
4661 "-mno-speculate-indirect-jumps");
4662
4663 return ret;
4664 }
4665
4666 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4667 define the target cpu type. */
4668
4669 static void
4670 rs6000_option_override (void)
4671 {
4672 (void) rs6000_option_override_internal (true);
4673 }
4674
4675 \f
4676 /* Implement targetm.vectorize.builtin_mask_for_load. */
4677 static tree
4678 rs6000_builtin_mask_for_load (void)
4679 {
4680 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4681 if ((TARGET_ALTIVEC && !TARGET_VSX)
4682 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4683 return altivec_builtin_mask_for_load;
4684 else
4685 return 0;
4686 }
4687
4688 /* Implement LOOP_ALIGN. */
4689 align_flags
4690 rs6000_loop_align (rtx label)
4691 {
4692 basic_block bb;
4693 int ninsns;
4694
4695 /* Don't override loop alignment if -falign-loops was specified. */
4696 if (!can_override_loop_align)
4697 return align_loops;
4698
4699 bb = BLOCK_FOR_INSN (label);
4700 ninsns = num_loop_insns(bb->loop_father);
4701
4702 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4703 if (ninsns > 4 && ninsns <= 8
4704 && (rs6000_tune == PROCESSOR_POWER4
4705 || rs6000_tune == PROCESSOR_POWER5
4706 || rs6000_tune == PROCESSOR_POWER6
4707 || rs6000_tune == PROCESSOR_POWER7
4708 || rs6000_tune == PROCESSOR_POWER8))
4709 return align_flags (5);
4710 else
4711 return align_loops;
4712 }
4713
4714 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4715 after applying N number of iterations. This routine does not determine
4716 how may iterations are required to reach desired alignment. */
4717
4718 static bool
4719 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4720 {
4721 if (is_packed)
4722 return false;
4723
4724 if (TARGET_32BIT)
4725 {
4726 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4727 return true;
4728
4729 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4730 return true;
4731
4732 return false;
4733 }
4734 else
4735 {
4736 if (TARGET_MACHO)
4737 return false;
4738
4739 /* Assuming that all other types are naturally aligned. CHECKME! */
4740 return true;
4741 }
4742 }
4743
4744 /* Return true if the vector misalignment factor is supported by the
4745 target. */
4746 static bool
4747 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4748 const_tree type,
4749 int misalignment,
4750 bool is_packed)
4751 {
4752 if (TARGET_VSX)
4753 {
4754 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4755 return true;
4756
4757 /* Return if movmisalign pattern is not supported for this mode. */
4758 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4759 return false;
4760
4761 if (misalignment == -1)
4762 {
4763 /* Misalignment factor is unknown at compile time but we know
4764 it's word aligned. */
4765 if (rs6000_vector_alignment_reachable (type, is_packed))
4766 {
4767 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4768
4769 if (element_size == 64 || element_size == 32)
4770 return true;
4771 }
4772
4773 return false;
4774 }
4775
4776 /* VSX supports word-aligned vector. */
4777 if (misalignment % 4 == 0)
4778 return true;
4779 }
4780 return false;
4781 }
4782
4783 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4784 static int
4785 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4786 tree vectype, int misalign)
4787 {
4788 unsigned elements;
4789 tree elem_type;
4790
4791 switch (type_of_cost)
4792 {
4793 case scalar_stmt:
4794 case scalar_store:
4795 case vector_stmt:
4796 case vector_store:
4797 case vec_to_scalar:
4798 case scalar_to_vec:
4799 case cond_branch_not_taken:
4800 return 1;
4801 case scalar_load:
4802 case vector_load:
4803 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4804 return 2;
4805
4806 case vec_perm:
4807 /* Power7 has only one permute unit, make it a bit expensive. */
4808 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4809 return 3;
4810 else
4811 return 1;
4812
4813 case vec_promote_demote:
4814 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4815 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4816 return 4;
4817 else
4818 return 1;
4819
4820 case cond_branch_taken:
4821 return 3;
4822
4823 case unaligned_load:
4824 case vector_gather_load:
4825 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4826 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4827 return 2;
4828
4829 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4830 {
4831 elements = TYPE_VECTOR_SUBPARTS (vectype);
4832 if (elements == 2)
4833 /* Double word aligned. */
4834 return 4;
4835
4836 if (elements == 4)
4837 {
4838 switch (misalign)
4839 {
4840 case 8:
4841 /* Double word aligned. */
4842 return 4;
4843
4844 case -1:
4845 /* Unknown misalignment. */
4846 case 4:
4847 case 12:
4848 /* Word aligned. */
4849 return 33;
4850
4851 default:
4852 gcc_unreachable ();
4853 }
4854 }
4855 }
4856
4857 if (TARGET_ALTIVEC)
4858 /* Misaligned loads are not supported. */
4859 gcc_unreachable ();
4860
4861 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4862 return 4;
4863
4864 case unaligned_store:
4865 case vector_scatter_store:
4866 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4867 return 1;
4868
4869 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4870 {
4871 elements = TYPE_VECTOR_SUBPARTS (vectype);
4872 if (elements == 2)
4873 /* Double word aligned. */
4874 return 2;
4875
4876 if (elements == 4)
4877 {
4878 switch (misalign)
4879 {
4880 case 8:
4881 /* Double word aligned. */
4882 return 2;
4883
4884 case -1:
4885 /* Unknown misalignment. */
4886 case 4:
4887 case 12:
4888 /* Word aligned. */
4889 return 23;
4890
4891 default:
4892 gcc_unreachable ();
4893 }
4894 }
4895 }
4896
4897 if (TARGET_ALTIVEC)
4898 /* Misaligned stores are not supported. */
4899 gcc_unreachable ();
4900
4901 return 2;
4902
4903 case vec_construct:
4904 /* This is a rough approximation assuming non-constant elements
4905 constructed into a vector via element insertion. FIXME:
4906 vec_construct is not granular enough for uniformly good
4907 decisions. If the initialization is a splat, this is
4908 cheaper than we estimate. Improve this someday. */
4909 elem_type = TREE_TYPE (vectype);
4910 /* 32-bit vectors loaded into registers are stored as double
4911 precision, so we need 2 permutes, 2 converts, and 1 merge
4912 to construct a vector of short floats from them. */
4913 if (SCALAR_FLOAT_TYPE_P (elem_type)
4914 && TYPE_PRECISION (elem_type) == 32)
4915 return 5;
4916 /* On POWER9, integer vector types are built up in GPRs and then
4917 use a direct move (2 cycles). For POWER8 this is even worse,
4918 as we need two direct moves and a merge, and the direct moves
4919 are five cycles. */
4920 else if (INTEGRAL_TYPE_P (elem_type))
4921 {
4922 if (TARGET_P9_VECTOR)
4923 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4924 else
4925 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4926 }
4927 else
4928 /* V2DFmode doesn't need a direct move. */
4929 return 2;
4930
4931 default:
4932 gcc_unreachable ();
4933 }
4934 }
4935
4936 /* Implement targetm.vectorize.preferred_simd_mode. */
4937
4938 static machine_mode
4939 rs6000_preferred_simd_mode (scalar_mode mode)
4940 {
4941 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4942
4943 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4944 return vmode.require ();
4945
4946 return word_mode;
4947 }
4948
4949 typedef struct _rs6000_cost_data
4950 {
4951 struct loop *loop_info;
4952 unsigned cost[3];
4953 } rs6000_cost_data;
4954
4955 /* Test for likely overcommitment of vector hardware resources. If a
4956 loop iteration is relatively large, and too large a percentage of
4957 instructions in the loop are vectorized, the cost model may not
4958 adequately reflect delays from unavailable vector resources.
4959 Penalize the loop body cost for this case. */
4960
4961 static void
4962 rs6000_density_test (rs6000_cost_data *data)
4963 {
4964 const int DENSITY_PCT_THRESHOLD = 85;
4965 const int DENSITY_SIZE_THRESHOLD = 70;
4966 const int DENSITY_PENALTY = 10;
4967 struct loop *loop = data->loop_info;
4968 basic_block *bbs = get_loop_body (loop);
4969 int nbbs = loop->num_nodes;
4970 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4971 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4972 int i, density_pct;
4973
4974 for (i = 0; i < nbbs; i++)
4975 {
4976 basic_block bb = bbs[i];
4977 gimple_stmt_iterator gsi;
4978
4979 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4980 {
4981 gimple *stmt = gsi_stmt (gsi);
4982 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4983
4984 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4985 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4986 not_vec_cost++;
4987 }
4988 }
4989
4990 free (bbs);
4991 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4992
4993 if (density_pct > DENSITY_PCT_THRESHOLD
4994 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4995 {
4996 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4997 if (dump_enabled_p ())
4998 dump_printf_loc (MSG_NOTE, vect_location,
4999 "density %d%%, cost %d exceeds threshold, penalizing "
5000 "loop body cost by %d%%", density_pct,
5001 vec_cost + not_vec_cost, DENSITY_PENALTY);
5002 }
5003 }
5004
5005 /* Implement targetm.vectorize.init_cost. */
5006
5007 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5008 instruction is needed by the vectorization. */
5009 static bool rs6000_vect_nonmem;
5010
5011 static void *
5012 rs6000_init_cost (struct loop *loop_info)
5013 {
5014 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5015 data->loop_info = loop_info;
5016 data->cost[vect_prologue] = 0;
5017 data->cost[vect_body] = 0;
5018 data->cost[vect_epilogue] = 0;
5019 rs6000_vect_nonmem = false;
5020 return data;
5021 }
5022
5023 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5024 For some statement, we would like to further fine-grain tweak the cost on
5025 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5026 information on statement operation codes etc. One typical case here is
5027 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5028 for scalar cost, but it should be priced more whatever transformed to either
5029 compare + branch or compare + isel instructions. */
5030
5031 static unsigned
5032 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5033 struct _stmt_vec_info *stmt_info)
5034 {
5035 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5036 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5037 {
5038 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5039 if (subcode == COND_EXPR)
5040 return 2;
5041 }
5042
5043 return 0;
5044 }
5045
5046 /* Implement targetm.vectorize.add_stmt_cost. */
5047
5048 static unsigned
5049 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5050 struct _stmt_vec_info *stmt_info, int misalign,
5051 enum vect_cost_model_location where)
5052 {
5053 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5054 unsigned retval = 0;
5055
5056 if (flag_vect_cost_model)
5057 {
5058 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5059 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5060 misalign);
5061 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5062 /* Statements in an inner loop relative to the loop being
5063 vectorized are weighted more heavily. The value here is
5064 arbitrary and could potentially be improved with analysis. */
5065 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5066 count *= 50; /* FIXME. */
5067
5068 retval = (unsigned) (count * stmt_cost);
5069 cost_data->cost[where] += retval;
5070
5071 /* Check whether we're doing something other than just a copy loop.
5072 Not all such loops may be profitably vectorized; see
5073 rs6000_finish_cost. */
5074 if ((kind == vec_to_scalar || kind == vec_perm
5075 || kind == vec_promote_demote || kind == vec_construct
5076 || kind == scalar_to_vec)
5077 || (where == vect_body && kind == vector_stmt))
5078 rs6000_vect_nonmem = true;
5079 }
5080
5081 return retval;
5082 }
5083
5084 /* Implement targetm.vectorize.finish_cost. */
5085
5086 static void
5087 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5088 unsigned *body_cost, unsigned *epilogue_cost)
5089 {
5090 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5091
5092 if (cost_data->loop_info)
5093 rs6000_density_test (cost_data);
5094
5095 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5096 that require versioning for any reason. The vectorization is at
5097 best a wash inside the loop, and the versioning checks make
5098 profitability highly unlikely and potentially quite harmful. */
5099 if (cost_data->loop_info)
5100 {
5101 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5102 if (!rs6000_vect_nonmem
5103 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5104 && LOOP_REQUIRES_VERSIONING (vec_info))
5105 cost_data->cost[vect_body] += 10000;
5106 }
5107
5108 *prologue_cost = cost_data->cost[vect_prologue];
5109 *body_cost = cost_data->cost[vect_body];
5110 *epilogue_cost = cost_data->cost[vect_epilogue];
5111 }
5112
5113 /* Implement targetm.vectorize.destroy_cost_data. */
5114
5115 static void
5116 rs6000_destroy_cost_data (void *data)
5117 {
5118 free (data);
5119 }
5120
5121 /* Implement targetm.loop_unroll_adjust. */
5122
5123 static unsigned
5124 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5125 {
5126 if (unroll_only_small_loops)
5127 {
5128 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5129 example we may want to unroll very small loops more times (4 perhaps).
5130 We also should use a PARAM for this. */
5131 if (loop->ninsns <= 10)
5132 return MIN (2, nunroll);
5133 else
5134 return 0;
5135 }
5136
5137 return nunroll;
5138 }
5139
5140 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5141 library with vectorized intrinsics. */
5142
5143 static tree
5144 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5145 tree type_in)
5146 {
5147 char name[32];
5148 const char *suffix = NULL;
5149 tree fntype, new_fndecl, bdecl = NULL_TREE;
5150 int n_args = 1;
5151 const char *bname;
5152 machine_mode el_mode, in_mode;
5153 int n, in_n;
5154
5155 /* Libmass is suitable for unsafe math only as it does not correctly support
5156 parts of IEEE with the required precision such as denormals. Only support
5157 it if we have VSX to use the simd d2 or f4 functions.
5158 XXX: Add variable length support. */
5159 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5160 return NULL_TREE;
5161
5162 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5163 n = TYPE_VECTOR_SUBPARTS (type_out);
5164 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5165 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5166 if (el_mode != in_mode
5167 || n != in_n)
5168 return NULL_TREE;
5169
5170 switch (fn)
5171 {
5172 CASE_CFN_ATAN2:
5173 CASE_CFN_HYPOT:
5174 CASE_CFN_POW:
5175 n_args = 2;
5176 gcc_fallthrough ();
5177
5178 CASE_CFN_ACOS:
5179 CASE_CFN_ACOSH:
5180 CASE_CFN_ASIN:
5181 CASE_CFN_ASINH:
5182 CASE_CFN_ATAN:
5183 CASE_CFN_ATANH:
5184 CASE_CFN_CBRT:
5185 CASE_CFN_COS:
5186 CASE_CFN_COSH:
5187 CASE_CFN_ERF:
5188 CASE_CFN_ERFC:
5189 CASE_CFN_EXP2:
5190 CASE_CFN_EXP:
5191 CASE_CFN_EXPM1:
5192 CASE_CFN_LGAMMA:
5193 CASE_CFN_LOG10:
5194 CASE_CFN_LOG1P:
5195 CASE_CFN_LOG2:
5196 CASE_CFN_LOG:
5197 CASE_CFN_SIN:
5198 CASE_CFN_SINH:
5199 CASE_CFN_SQRT:
5200 CASE_CFN_TAN:
5201 CASE_CFN_TANH:
5202 if (el_mode == DFmode && n == 2)
5203 {
5204 bdecl = mathfn_built_in (double_type_node, fn);
5205 suffix = "d2"; /* pow -> powd2 */
5206 }
5207 else if (el_mode == SFmode && n == 4)
5208 {
5209 bdecl = mathfn_built_in (float_type_node, fn);
5210 suffix = "4"; /* powf -> powf4 */
5211 }
5212 else
5213 return NULL_TREE;
5214 if (!bdecl)
5215 return NULL_TREE;
5216 break;
5217
5218 default:
5219 return NULL_TREE;
5220 }
5221
5222 gcc_assert (suffix != NULL);
5223 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5224 if (!bname)
5225 return NULL_TREE;
5226
5227 strcpy (name, bname + strlen ("__builtin_"));
5228 strcat (name, suffix);
5229
5230 if (n_args == 1)
5231 fntype = build_function_type_list (type_out, type_in, NULL);
5232 else if (n_args == 2)
5233 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5234 else
5235 gcc_unreachable ();
5236
5237 /* Build a function declaration for the vectorized function. */
5238 new_fndecl = build_decl (BUILTINS_LOCATION,
5239 FUNCTION_DECL, get_identifier (name), fntype);
5240 TREE_PUBLIC (new_fndecl) = 1;
5241 DECL_EXTERNAL (new_fndecl) = 1;
5242 DECL_IS_NOVOPS (new_fndecl) = 1;
5243 TREE_READONLY (new_fndecl) = 1;
5244
5245 return new_fndecl;
5246 }
5247
5248 /* Returns a function decl for a vectorized version of the builtin function
5249 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5250 if it is not available. */
5251
5252 static tree
5253 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5254 tree type_in)
5255 {
5256 machine_mode in_mode, out_mode;
5257 int in_n, out_n;
5258
5259 if (TARGET_DEBUG_BUILTIN)
5260 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5261 combined_fn_name (combined_fn (fn)),
5262 GET_MODE_NAME (TYPE_MODE (type_out)),
5263 GET_MODE_NAME (TYPE_MODE (type_in)));
5264
5265 if (TREE_CODE (type_out) != VECTOR_TYPE
5266 || TREE_CODE (type_in) != VECTOR_TYPE)
5267 return NULL_TREE;
5268
5269 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5270 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5271 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5272 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5273
5274 switch (fn)
5275 {
5276 CASE_CFN_COPYSIGN:
5277 if (VECTOR_UNIT_VSX_P (V2DFmode)
5278 && out_mode == DFmode && out_n == 2
5279 && in_mode == DFmode && in_n == 2)
5280 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5281 if (VECTOR_UNIT_VSX_P (V4SFmode)
5282 && out_mode == SFmode && out_n == 4
5283 && in_mode == SFmode && in_n == 4)
5284 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5285 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5286 && out_mode == SFmode && out_n == 4
5287 && in_mode == SFmode && in_n == 4)
5288 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5289 break;
5290 CASE_CFN_CEIL:
5291 if (VECTOR_UNIT_VSX_P (V2DFmode)
5292 && out_mode == DFmode && out_n == 2
5293 && in_mode == DFmode && in_n == 2)
5294 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5295 if (VECTOR_UNIT_VSX_P (V4SFmode)
5296 && out_mode == SFmode && out_n == 4
5297 && in_mode == SFmode && in_n == 4)
5298 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5299 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5300 && out_mode == SFmode && out_n == 4
5301 && in_mode == SFmode && in_n == 4)
5302 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5303 break;
5304 CASE_CFN_FLOOR:
5305 if (VECTOR_UNIT_VSX_P (V2DFmode)
5306 && out_mode == DFmode && out_n == 2
5307 && in_mode == DFmode && in_n == 2)
5308 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5309 if (VECTOR_UNIT_VSX_P (V4SFmode)
5310 && out_mode == SFmode && out_n == 4
5311 && in_mode == SFmode && in_n == 4)
5312 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5313 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5314 && out_mode == SFmode && out_n == 4
5315 && in_mode == SFmode && in_n == 4)
5316 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5317 break;
5318 CASE_CFN_FMA:
5319 if (VECTOR_UNIT_VSX_P (V2DFmode)
5320 && out_mode == DFmode && out_n == 2
5321 && in_mode == DFmode && in_n == 2)
5322 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5323 if (VECTOR_UNIT_VSX_P (V4SFmode)
5324 && out_mode == SFmode && out_n == 4
5325 && in_mode == SFmode && in_n == 4)
5326 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5327 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5328 && out_mode == SFmode && out_n == 4
5329 && in_mode == SFmode && in_n == 4)
5330 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5331 break;
5332 CASE_CFN_TRUNC:
5333 if (VECTOR_UNIT_VSX_P (V2DFmode)
5334 && out_mode == DFmode && out_n == 2
5335 && in_mode == DFmode && in_n == 2)
5336 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5337 if (VECTOR_UNIT_VSX_P (V4SFmode)
5338 && out_mode == SFmode && out_n == 4
5339 && in_mode == SFmode && in_n == 4)
5340 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5341 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5342 && out_mode == SFmode && out_n == 4
5343 && in_mode == SFmode && in_n == 4)
5344 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5345 break;
5346 CASE_CFN_NEARBYINT:
5347 if (VECTOR_UNIT_VSX_P (V2DFmode)
5348 && flag_unsafe_math_optimizations
5349 && out_mode == DFmode && out_n == 2
5350 && in_mode == DFmode && in_n == 2)
5351 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5352 if (VECTOR_UNIT_VSX_P (V4SFmode)
5353 && flag_unsafe_math_optimizations
5354 && out_mode == SFmode && out_n == 4
5355 && in_mode == SFmode && in_n == 4)
5356 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5357 break;
5358 CASE_CFN_RINT:
5359 if (VECTOR_UNIT_VSX_P (V2DFmode)
5360 && !flag_trapping_math
5361 && out_mode == DFmode && out_n == 2
5362 && in_mode == DFmode && in_n == 2)
5363 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5364 if (VECTOR_UNIT_VSX_P (V4SFmode)
5365 && !flag_trapping_math
5366 && out_mode == SFmode && out_n == 4
5367 && in_mode == SFmode && in_n == 4)
5368 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5369 break;
5370 default:
5371 break;
5372 }
5373
5374 /* Generate calls to libmass if appropriate. */
5375 if (rs6000_veclib_handler)
5376 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5377
5378 return NULL_TREE;
5379 }
5380
5381 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5382
5383 static tree
5384 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5385 tree type_in)
5386 {
5387 machine_mode in_mode, out_mode;
5388 int in_n, out_n;
5389
5390 if (TARGET_DEBUG_BUILTIN)
5391 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5392 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5393 GET_MODE_NAME (TYPE_MODE (type_out)),
5394 GET_MODE_NAME (TYPE_MODE (type_in)));
5395
5396 if (TREE_CODE (type_out) != VECTOR_TYPE
5397 || TREE_CODE (type_in) != VECTOR_TYPE)
5398 return NULL_TREE;
5399
5400 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5401 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5402 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5403 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5404
5405 enum rs6000_builtins fn
5406 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5407 switch (fn)
5408 {
5409 case RS6000_BUILTIN_RSQRTF:
5410 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5411 && out_mode == SFmode && out_n == 4
5412 && in_mode == SFmode && in_n == 4)
5413 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5414 break;
5415 case RS6000_BUILTIN_RSQRT:
5416 if (VECTOR_UNIT_VSX_P (V2DFmode)
5417 && out_mode == DFmode && out_n == 2
5418 && in_mode == DFmode && in_n == 2)
5419 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5420 break;
5421 case RS6000_BUILTIN_RECIPF:
5422 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5423 && out_mode == SFmode && out_n == 4
5424 && in_mode == SFmode && in_n == 4)
5425 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5426 break;
5427 case RS6000_BUILTIN_RECIP:
5428 if (VECTOR_UNIT_VSX_P (V2DFmode)
5429 && out_mode == DFmode && out_n == 2
5430 && in_mode == DFmode && in_n == 2)
5431 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5432 break;
5433 default:
5434 break;
5435 }
5436 return NULL_TREE;
5437 }
5438 \f
5439 /* Default CPU string for rs6000*_file_start functions. */
5440 static const char *rs6000_default_cpu;
5441
5442 #ifdef USING_ELFOS_H
5443 const char *rs6000_machine;
5444
5445 const char *
5446 rs6000_machine_from_flags (void)
5447 {
5448 HOST_WIDE_INT flags = rs6000_isa_flags;
5449
5450 /* Disable the flags that should never influence the .machine selection. */
5451 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5452
5453 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5454 return "future";
5455 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5456 return "power9";
5457 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5458 return "power8";
5459 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5460 return "power7";
5461 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5462 return "power6";
5463 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5464 return "power5";
5465 if ((flags & ISA_2_1_MASKS) != 0)
5466 return "power4";
5467 if ((flags & OPTION_MASK_POWERPC64) != 0)
5468 return "ppc64";
5469 return "ppc";
5470 }
5471
5472 void
5473 emit_asm_machine (void)
5474 {
5475 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5476 }
5477 #endif
5478
5479 /* Do anything needed at the start of the asm file. */
5480
5481 static void
5482 rs6000_file_start (void)
5483 {
5484 char buffer[80];
5485 const char *start = buffer;
5486 FILE *file = asm_out_file;
5487
5488 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5489
5490 default_file_start ();
5491
5492 if (flag_verbose_asm)
5493 {
5494 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5495
5496 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5497 {
5498 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5499 start = "";
5500 }
5501
5502 if (global_options_set.x_rs6000_cpu_index)
5503 {
5504 fprintf (file, "%s -mcpu=%s", start,
5505 processor_target_table[rs6000_cpu_index].name);
5506 start = "";
5507 }
5508
5509 if (global_options_set.x_rs6000_tune_index)
5510 {
5511 fprintf (file, "%s -mtune=%s", start,
5512 processor_target_table[rs6000_tune_index].name);
5513 start = "";
5514 }
5515
5516 if (PPC405_ERRATUM77)
5517 {
5518 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5519 start = "";
5520 }
5521
5522 #ifdef USING_ELFOS_H
5523 switch (rs6000_sdata)
5524 {
5525 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5526 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5527 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5528 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5529 }
5530
5531 if (rs6000_sdata && g_switch_value)
5532 {
5533 fprintf (file, "%s -G %d", start,
5534 g_switch_value);
5535 start = "";
5536 }
5537 #endif
5538
5539 if (*start == '\0')
5540 putc ('\n', file);
5541 }
5542
5543 #ifdef USING_ELFOS_H
5544 rs6000_machine = rs6000_machine_from_flags ();
5545 emit_asm_machine ();
5546 #endif
5547
5548 if (DEFAULT_ABI == ABI_ELFv2)
5549 fprintf (file, "\t.abiversion 2\n");
5550 }
5551
5552 \f
5553 /* Return nonzero if this function is known to have a null epilogue. */
5554
5555 int
5556 direct_return (void)
5557 {
5558 if (reload_completed)
5559 {
5560 rs6000_stack_t *info = rs6000_stack_info ();
5561
5562 if (info->first_gp_reg_save == 32
5563 && info->first_fp_reg_save == 64
5564 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5565 && ! info->lr_save_p
5566 && ! info->cr_save_p
5567 && info->vrsave_size == 0
5568 && ! info->push_p)
5569 return 1;
5570 }
5571
5572 return 0;
5573 }
5574
5575 /* Helper for num_insns_constant. Calculate number of instructions to
5576 load VALUE to a single gpr using combinations of addi, addis, ori,
5577 oris and sldi instructions. */
5578
5579 static int
5580 num_insns_constant_gpr (HOST_WIDE_INT value)
5581 {
5582 /* signed constant loadable with addi */
5583 if (SIGNED_INTEGER_16BIT_P (value))
5584 return 1;
5585
5586 /* constant loadable with addis */
5587 else if ((value & 0xffff) == 0
5588 && (value >> 31 == -1 || value >> 31 == 0))
5589 return 1;
5590
5591 /* PADDI can support up to 34 bit signed integers. */
5592 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5593 return 1;
5594
5595 else if (TARGET_POWERPC64)
5596 {
5597 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5598 HOST_WIDE_INT high = value >> 31;
5599
5600 if (high == 0 || high == -1)
5601 return 2;
5602
5603 high >>= 1;
5604
5605 if (low == 0)
5606 return num_insns_constant_gpr (high) + 1;
5607 else if (high == 0)
5608 return num_insns_constant_gpr (low) + 1;
5609 else
5610 return (num_insns_constant_gpr (high)
5611 + num_insns_constant_gpr (low) + 1);
5612 }
5613
5614 else
5615 return 2;
5616 }
5617
5618 /* Helper for num_insns_constant. Allow constants formed by the
5619 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5620 and handle modes that require multiple gprs. */
5621
5622 static int
5623 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5624 {
5625 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5626 int total = 0;
5627 while (nregs-- > 0)
5628 {
5629 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5630 int insns = num_insns_constant_gpr (low);
5631 if (insns > 2
5632 /* We won't get more than 2 from num_insns_constant_gpr
5633 except when TARGET_POWERPC64 and mode is DImode or
5634 wider, so the register mode must be DImode. */
5635 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5636 insns = 2;
5637 total += insns;
5638 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5639 it all at once would be UB. */
5640 value >>= (BITS_PER_WORD - 1);
5641 value >>= 1;
5642 }
5643 return total;
5644 }
5645
5646 /* Return the number of instructions it takes to form a constant in as
5647 many gprs are needed for MODE. */
5648
5649 int
5650 num_insns_constant (rtx op, machine_mode mode)
5651 {
5652 HOST_WIDE_INT val;
5653
5654 switch (GET_CODE (op))
5655 {
5656 case CONST_INT:
5657 val = INTVAL (op);
5658 break;
5659
5660 case CONST_WIDE_INT:
5661 {
5662 int insns = 0;
5663 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5664 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5665 DImode);
5666 return insns;
5667 }
5668
5669 case CONST_DOUBLE:
5670 {
5671 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5672
5673 if (mode == SFmode || mode == SDmode)
5674 {
5675 long l;
5676
5677 if (mode == SDmode)
5678 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5679 else
5680 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5681 /* See the first define_split in rs6000.md handling a
5682 const_double_operand. */
5683 val = l;
5684 mode = SImode;
5685 }
5686 else if (mode == DFmode || mode == DDmode)
5687 {
5688 long l[2];
5689
5690 if (mode == DDmode)
5691 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5692 else
5693 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5694
5695 /* See the second (32-bit) and third (64-bit) define_split
5696 in rs6000.md handling a const_double_operand. */
5697 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5698 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5699 mode = DImode;
5700 }
5701 else if (mode == TFmode || mode == TDmode
5702 || mode == KFmode || mode == IFmode)
5703 {
5704 long l[4];
5705 int insns;
5706
5707 if (mode == TDmode)
5708 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5709 else
5710 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5711
5712 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5713 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5714 insns = num_insns_constant_multi (val, DImode);
5715 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5716 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5717 insns += num_insns_constant_multi (val, DImode);
5718 return insns;
5719 }
5720 else
5721 gcc_unreachable ();
5722 }
5723 break;
5724
5725 default:
5726 gcc_unreachable ();
5727 }
5728
5729 return num_insns_constant_multi (val, mode);
5730 }
5731
5732 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5733 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5734 corresponding element of the vector, but for V4SFmode, the
5735 corresponding "float" is interpreted as an SImode integer. */
5736
5737 HOST_WIDE_INT
5738 const_vector_elt_as_int (rtx op, unsigned int elt)
5739 {
5740 rtx tmp;
5741
5742 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5743 gcc_assert (GET_MODE (op) != V2DImode
5744 && GET_MODE (op) != V2DFmode);
5745
5746 tmp = CONST_VECTOR_ELT (op, elt);
5747 if (GET_MODE (op) == V4SFmode)
5748 tmp = gen_lowpart (SImode, tmp);
5749 return INTVAL (tmp);
5750 }
5751
5752 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5753 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5754 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5755 all items are set to the same value and contain COPIES replicas of the
5756 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5757 operand and the others are set to the value of the operand's msb. */
5758
5759 static bool
5760 vspltis_constant (rtx op, unsigned step, unsigned copies)
5761 {
5762 machine_mode mode = GET_MODE (op);
5763 machine_mode inner = GET_MODE_INNER (mode);
5764
5765 unsigned i;
5766 unsigned nunits;
5767 unsigned bitsize;
5768 unsigned mask;
5769
5770 HOST_WIDE_INT val;
5771 HOST_WIDE_INT splat_val;
5772 HOST_WIDE_INT msb_val;
5773
5774 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5775 return false;
5776
5777 nunits = GET_MODE_NUNITS (mode);
5778 bitsize = GET_MODE_BITSIZE (inner);
5779 mask = GET_MODE_MASK (inner);
5780
5781 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5782 splat_val = val;
5783 msb_val = val >= 0 ? 0 : -1;
5784
5785 /* Construct the value to be splatted, if possible. If not, return 0. */
5786 for (i = 2; i <= copies; i *= 2)
5787 {
5788 HOST_WIDE_INT small_val;
5789 bitsize /= 2;
5790 small_val = splat_val >> bitsize;
5791 mask >>= bitsize;
5792 if (splat_val != ((HOST_WIDE_INT)
5793 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5794 | (small_val & mask)))
5795 return false;
5796 splat_val = small_val;
5797 }
5798
5799 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5800 if (EASY_VECTOR_15 (splat_val))
5801 ;
5802
5803 /* Also check if we can splat, and then add the result to itself. Do so if
5804 the value is positive, of if the splat instruction is using OP's mode;
5805 for splat_val < 0, the splat and the add should use the same mode. */
5806 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5807 && (splat_val >= 0 || (step == 1 && copies == 1)))
5808 ;
5809
5810 /* Also check if are loading up the most significant bit which can be done by
5811 loading up -1 and shifting the value left by -1. */
5812 else if (EASY_VECTOR_MSB (splat_val, inner))
5813 ;
5814
5815 else
5816 return false;
5817
5818 /* Check if VAL is present in every STEP-th element, and the
5819 other elements are filled with its most significant bit. */
5820 for (i = 1; i < nunits; ++i)
5821 {
5822 HOST_WIDE_INT desired_val;
5823 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5824 if ((i & (step - 1)) == 0)
5825 desired_val = val;
5826 else
5827 desired_val = msb_val;
5828
5829 if (desired_val != const_vector_elt_as_int (op, elt))
5830 return false;
5831 }
5832
5833 return true;
5834 }
5835
5836 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5837 instruction, filling in the bottom elements with 0 or -1.
5838
5839 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5840 for the number of zeroes to shift in, or negative for the number of 0xff
5841 bytes to shift in.
5842
5843 OP is a CONST_VECTOR. */
5844
5845 int
5846 vspltis_shifted (rtx op)
5847 {
5848 machine_mode mode = GET_MODE (op);
5849 machine_mode inner = GET_MODE_INNER (mode);
5850
5851 unsigned i, j;
5852 unsigned nunits;
5853 unsigned mask;
5854
5855 HOST_WIDE_INT val;
5856
5857 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5858 return false;
5859
5860 /* We need to create pseudo registers to do the shift, so don't recognize
5861 shift vector constants after reload. */
5862 if (!can_create_pseudo_p ())
5863 return false;
5864
5865 nunits = GET_MODE_NUNITS (mode);
5866 mask = GET_MODE_MASK (inner);
5867
5868 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5869
5870 /* Check if the value can really be the operand of a vspltis[bhw]. */
5871 if (EASY_VECTOR_15 (val))
5872 ;
5873
5874 /* Also check if we are loading up the most significant bit which can be done
5875 by loading up -1 and shifting the value left by -1. */
5876 else if (EASY_VECTOR_MSB (val, inner))
5877 ;
5878
5879 else
5880 return 0;
5881
5882 /* Check if VAL is present in every STEP-th element until we find elements
5883 that are 0 or all 1 bits. */
5884 for (i = 1; i < nunits; ++i)
5885 {
5886 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5887 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5888
5889 /* If the value isn't the splat value, check for the remaining elements
5890 being 0/-1. */
5891 if (val != elt_val)
5892 {
5893 if (elt_val == 0)
5894 {
5895 for (j = i+1; j < nunits; ++j)
5896 {
5897 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5898 if (const_vector_elt_as_int (op, elt2) != 0)
5899 return 0;
5900 }
5901
5902 return (nunits - i) * GET_MODE_SIZE (inner);
5903 }
5904
5905 else if ((elt_val & mask) == mask)
5906 {
5907 for (j = i+1; j < nunits; ++j)
5908 {
5909 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5910 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5911 return 0;
5912 }
5913
5914 return -((nunits - i) * GET_MODE_SIZE (inner));
5915 }
5916
5917 else
5918 return 0;
5919 }
5920 }
5921
5922 /* If all elements are equal, we don't need to do VLSDOI. */
5923 return 0;
5924 }
5925
5926
5927 /* Return true if OP is of the given MODE and can be synthesized
5928 with a vspltisb, vspltish or vspltisw. */
5929
5930 bool
5931 easy_altivec_constant (rtx op, machine_mode mode)
5932 {
5933 unsigned step, copies;
5934
5935 if (mode == VOIDmode)
5936 mode = GET_MODE (op);
5937 else if (mode != GET_MODE (op))
5938 return false;
5939
5940 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5941 constants. */
5942 if (mode == V2DFmode)
5943 return zero_constant (op, mode);
5944
5945 else if (mode == V2DImode)
5946 {
5947 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5948 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5949 return false;
5950
5951 if (zero_constant (op, mode))
5952 return true;
5953
5954 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5955 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5956 return true;
5957
5958 return false;
5959 }
5960
5961 /* V1TImode is a special container for TImode. Ignore for now. */
5962 else if (mode == V1TImode)
5963 return false;
5964
5965 /* Start with a vspltisw. */
5966 step = GET_MODE_NUNITS (mode) / 4;
5967 copies = 1;
5968
5969 if (vspltis_constant (op, step, copies))
5970 return true;
5971
5972 /* Then try with a vspltish. */
5973 if (step == 1)
5974 copies <<= 1;
5975 else
5976 step >>= 1;
5977
5978 if (vspltis_constant (op, step, copies))
5979 return true;
5980
5981 /* And finally a vspltisb. */
5982 if (step == 1)
5983 copies <<= 1;
5984 else
5985 step >>= 1;
5986
5987 if (vspltis_constant (op, step, copies))
5988 return true;
5989
5990 if (vspltis_shifted (op) != 0)
5991 return true;
5992
5993 return false;
5994 }
5995
5996 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5997 result is OP. Abort if it is not possible. */
5998
5999 rtx
6000 gen_easy_altivec_constant (rtx op)
6001 {
6002 machine_mode mode = GET_MODE (op);
6003 int nunits = GET_MODE_NUNITS (mode);
6004 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6005 unsigned step = nunits / 4;
6006 unsigned copies = 1;
6007
6008 /* Start with a vspltisw. */
6009 if (vspltis_constant (op, step, copies))
6010 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6011
6012 /* Then try with a vspltish. */
6013 if (step == 1)
6014 copies <<= 1;
6015 else
6016 step >>= 1;
6017
6018 if (vspltis_constant (op, step, copies))
6019 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6020
6021 /* And finally a vspltisb. */
6022 if (step == 1)
6023 copies <<= 1;
6024 else
6025 step >>= 1;
6026
6027 if (vspltis_constant (op, step, copies))
6028 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6029
6030 gcc_unreachable ();
6031 }
6032
6033 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6034 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6035
6036 Return the number of instructions needed (1 or 2) into the address pointed
6037 via NUM_INSNS_PTR.
6038
6039 Return the constant that is being split via CONSTANT_PTR. */
6040
6041 bool
6042 xxspltib_constant_p (rtx op,
6043 machine_mode mode,
6044 int *num_insns_ptr,
6045 int *constant_ptr)
6046 {
6047 size_t nunits = GET_MODE_NUNITS (mode);
6048 size_t i;
6049 HOST_WIDE_INT value;
6050 rtx element;
6051
6052 /* Set the returned values to out of bound values. */
6053 *num_insns_ptr = -1;
6054 *constant_ptr = 256;
6055
6056 if (!TARGET_P9_VECTOR)
6057 return false;
6058
6059 if (mode == VOIDmode)
6060 mode = GET_MODE (op);
6061
6062 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6063 return false;
6064
6065 /* Handle (vec_duplicate <constant>). */
6066 if (GET_CODE (op) == VEC_DUPLICATE)
6067 {
6068 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6069 && mode != V2DImode)
6070 return false;
6071
6072 element = XEXP (op, 0);
6073 if (!CONST_INT_P (element))
6074 return false;
6075
6076 value = INTVAL (element);
6077 if (!IN_RANGE (value, -128, 127))
6078 return false;
6079 }
6080
6081 /* Handle (const_vector [...]). */
6082 else if (GET_CODE (op) == CONST_VECTOR)
6083 {
6084 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6085 && mode != V2DImode)
6086 return false;
6087
6088 element = CONST_VECTOR_ELT (op, 0);
6089 if (!CONST_INT_P (element))
6090 return false;
6091
6092 value = INTVAL (element);
6093 if (!IN_RANGE (value, -128, 127))
6094 return false;
6095
6096 for (i = 1; i < nunits; i++)
6097 {
6098 element = CONST_VECTOR_ELT (op, i);
6099 if (!CONST_INT_P (element))
6100 return false;
6101
6102 if (value != INTVAL (element))
6103 return false;
6104 }
6105 }
6106
6107 /* Handle integer constants being loaded into the upper part of the VSX
6108 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6109 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6110 else if (CONST_INT_P (op))
6111 {
6112 if (!SCALAR_INT_MODE_P (mode))
6113 return false;
6114
6115 value = INTVAL (op);
6116 if (!IN_RANGE (value, -128, 127))
6117 return false;
6118
6119 if (!IN_RANGE (value, -1, 0))
6120 {
6121 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6122 return false;
6123
6124 if (EASY_VECTOR_15 (value))
6125 return false;
6126 }
6127 }
6128
6129 else
6130 return false;
6131
6132 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6133 sign extend. Special case 0/-1 to allow getting any VSX register instead
6134 of an Altivec register. */
6135 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6136 && EASY_VECTOR_15 (value))
6137 return false;
6138
6139 /* Return # of instructions and the constant byte for XXSPLTIB. */
6140 if (mode == V16QImode)
6141 *num_insns_ptr = 1;
6142
6143 else if (IN_RANGE (value, -1, 0))
6144 *num_insns_ptr = 1;
6145
6146 else
6147 *num_insns_ptr = 2;
6148
6149 *constant_ptr = (int) value;
6150 return true;
6151 }
6152
6153 const char *
6154 output_vec_const_move (rtx *operands)
6155 {
6156 int shift;
6157 machine_mode mode;
6158 rtx dest, vec;
6159
6160 dest = operands[0];
6161 vec = operands[1];
6162 mode = GET_MODE (dest);
6163
6164 if (TARGET_VSX)
6165 {
6166 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6167 int xxspltib_value = 256;
6168 int num_insns = -1;
6169
6170 if (zero_constant (vec, mode))
6171 {
6172 if (TARGET_P9_VECTOR)
6173 return "xxspltib %x0,0";
6174
6175 else if (dest_vmx_p)
6176 return "vspltisw %0,0";
6177
6178 else
6179 return "xxlxor %x0,%x0,%x0";
6180 }
6181
6182 if (all_ones_constant (vec, mode))
6183 {
6184 if (TARGET_P9_VECTOR)
6185 return "xxspltib %x0,255";
6186
6187 else if (dest_vmx_p)
6188 return "vspltisw %0,-1";
6189
6190 else if (TARGET_P8_VECTOR)
6191 return "xxlorc %x0,%x0,%x0";
6192
6193 else
6194 gcc_unreachable ();
6195 }
6196
6197 if (TARGET_P9_VECTOR
6198 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6199 {
6200 if (num_insns == 1)
6201 {
6202 operands[2] = GEN_INT (xxspltib_value & 0xff);
6203 return "xxspltib %x0,%2";
6204 }
6205
6206 return "#";
6207 }
6208 }
6209
6210 if (TARGET_ALTIVEC)
6211 {
6212 rtx splat_vec;
6213
6214 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6215 if (zero_constant (vec, mode))
6216 return "vspltisw %0,0";
6217
6218 if (all_ones_constant (vec, mode))
6219 return "vspltisw %0,-1";
6220
6221 /* Do we need to construct a value using VSLDOI? */
6222 shift = vspltis_shifted (vec);
6223 if (shift != 0)
6224 return "#";
6225
6226 splat_vec = gen_easy_altivec_constant (vec);
6227 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6228 operands[1] = XEXP (splat_vec, 0);
6229 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6230 return "#";
6231
6232 switch (GET_MODE (splat_vec))
6233 {
6234 case E_V4SImode:
6235 return "vspltisw %0,%1";
6236
6237 case E_V8HImode:
6238 return "vspltish %0,%1";
6239
6240 case E_V16QImode:
6241 return "vspltisb %0,%1";
6242
6243 default:
6244 gcc_unreachable ();
6245 }
6246 }
6247
6248 gcc_unreachable ();
6249 }
6250
6251 /* Initialize vector TARGET to VALS. */
6252
6253 void
6254 rs6000_expand_vector_init (rtx target, rtx vals)
6255 {
6256 machine_mode mode = GET_MODE (target);
6257 machine_mode inner_mode = GET_MODE_INNER (mode);
6258 int n_elts = GET_MODE_NUNITS (mode);
6259 int n_var = 0, one_var = -1;
6260 bool all_same = true, all_const_zero = true;
6261 rtx x, mem;
6262 int i;
6263
6264 for (i = 0; i < n_elts; ++i)
6265 {
6266 x = XVECEXP (vals, 0, i);
6267 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6268 ++n_var, one_var = i;
6269 else if (x != CONST0_RTX (inner_mode))
6270 all_const_zero = false;
6271
6272 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6273 all_same = false;
6274 }
6275
6276 if (n_var == 0)
6277 {
6278 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6279 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6280 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6281 {
6282 /* Zero register. */
6283 emit_move_insn (target, CONST0_RTX (mode));
6284 return;
6285 }
6286 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6287 {
6288 /* Splat immediate. */
6289 emit_insn (gen_rtx_SET (target, const_vec));
6290 return;
6291 }
6292 else
6293 {
6294 /* Load from constant pool. */
6295 emit_move_insn (target, const_vec);
6296 return;
6297 }
6298 }
6299
6300 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6301 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6302 {
6303 rtx op[2];
6304 size_t i;
6305 size_t num_elements = all_same ? 1 : 2;
6306 for (i = 0; i < num_elements; i++)
6307 {
6308 op[i] = XVECEXP (vals, 0, i);
6309 /* Just in case there is a SUBREG with a smaller mode, do a
6310 conversion. */
6311 if (GET_MODE (op[i]) != inner_mode)
6312 {
6313 rtx tmp = gen_reg_rtx (inner_mode);
6314 convert_move (tmp, op[i], 0);
6315 op[i] = tmp;
6316 }
6317 /* Allow load with splat double word. */
6318 else if (MEM_P (op[i]))
6319 {
6320 if (!all_same)
6321 op[i] = force_reg (inner_mode, op[i]);
6322 }
6323 else if (!REG_P (op[i]))
6324 op[i] = force_reg (inner_mode, op[i]);
6325 }
6326
6327 if (all_same)
6328 {
6329 if (mode == V2DFmode)
6330 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6331 else
6332 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6333 }
6334 else
6335 {
6336 if (mode == V2DFmode)
6337 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6338 else
6339 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6340 }
6341 return;
6342 }
6343
6344 /* Special case initializing vector int if we are on 64-bit systems with
6345 direct move or we have the ISA 3.0 instructions. */
6346 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6347 && TARGET_DIRECT_MOVE_64BIT)
6348 {
6349 if (all_same)
6350 {
6351 rtx element0 = XVECEXP (vals, 0, 0);
6352 if (MEM_P (element0))
6353 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6354 else
6355 element0 = force_reg (SImode, element0);
6356
6357 if (TARGET_P9_VECTOR)
6358 emit_insn (gen_vsx_splat_v4si (target, element0));
6359 else
6360 {
6361 rtx tmp = gen_reg_rtx (DImode);
6362 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6363 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6364 }
6365 return;
6366 }
6367 else
6368 {
6369 rtx elements[4];
6370 size_t i;
6371
6372 for (i = 0; i < 4; i++)
6373 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6374
6375 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6376 elements[2], elements[3]));
6377 return;
6378 }
6379 }
6380
6381 /* With single precision floating point on VSX, know that internally single
6382 precision is actually represented as a double, and either make 2 V2DF
6383 vectors, and convert these vectors to single precision, or do one
6384 conversion, and splat the result to the other elements. */
6385 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6386 {
6387 if (all_same)
6388 {
6389 rtx element0 = XVECEXP (vals, 0, 0);
6390
6391 if (TARGET_P9_VECTOR)
6392 {
6393 if (MEM_P (element0))
6394 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6395
6396 emit_insn (gen_vsx_splat_v4sf (target, element0));
6397 }
6398
6399 else
6400 {
6401 rtx freg = gen_reg_rtx (V4SFmode);
6402 rtx sreg = force_reg (SFmode, element0);
6403 rtx cvt = (TARGET_XSCVDPSPN
6404 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6405 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6406
6407 emit_insn (cvt);
6408 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6409 const0_rtx));
6410 }
6411 }
6412 else
6413 {
6414 rtx dbl_even = gen_reg_rtx (V2DFmode);
6415 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6416 rtx flt_even = gen_reg_rtx (V4SFmode);
6417 rtx flt_odd = gen_reg_rtx (V4SFmode);
6418 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6419 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6420 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6421 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6422
6423 /* Use VMRGEW if we can instead of doing a permute. */
6424 if (TARGET_P8_VECTOR)
6425 {
6426 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6427 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6428 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6429 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6430 if (BYTES_BIG_ENDIAN)
6431 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6432 else
6433 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6434 }
6435 else
6436 {
6437 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6438 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6439 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6440 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6441 rs6000_expand_extract_even (target, flt_even, flt_odd);
6442 }
6443 }
6444 return;
6445 }
6446
6447 /* Special case initializing vector short/char that are splats if we are on
6448 64-bit systems with direct move. */
6449 if (all_same && TARGET_DIRECT_MOVE_64BIT
6450 && (mode == V16QImode || mode == V8HImode))
6451 {
6452 rtx op0 = XVECEXP (vals, 0, 0);
6453 rtx di_tmp = gen_reg_rtx (DImode);
6454
6455 if (!REG_P (op0))
6456 op0 = force_reg (GET_MODE_INNER (mode), op0);
6457
6458 if (mode == V16QImode)
6459 {
6460 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6461 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6462 return;
6463 }
6464
6465 if (mode == V8HImode)
6466 {
6467 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6468 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6469 return;
6470 }
6471 }
6472
6473 /* Store value to stack temp. Load vector element. Splat. However, splat
6474 of 64-bit items is not supported on Altivec. */
6475 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6476 {
6477 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6478 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6479 XVECEXP (vals, 0, 0));
6480 x = gen_rtx_UNSPEC (VOIDmode,
6481 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6482 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6483 gen_rtvec (2,
6484 gen_rtx_SET (target, mem),
6485 x)));
6486 x = gen_rtx_VEC_SELECT (inner_mode, target,
6487 gen_rtx_PARALLEL (VOIDmode,
6488 gen_rtvec (1, const0_rtx)));
6489 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6490 return;
6491 }
6492
6493 /* One field is non-constant. Load constant then overwrite
6494 varying field. */
6495 if (n_var == 1)
6496 {
6497 rtx copy = copy_rtx (vals);
6498
6499 /* Load constant part of vector, substitute neighboring value for
6500 varying element. */
6501 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6502 rs6000_expand_vector_init (target, copy);
6503
6504 /* Insert variable. */
6505 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6506 return;
6507 }
6508
6509 /* Construct the vector in memory one field at a time
6510 and load the whole vector. */
6511 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6512 for (i = 0; i < n_elts; i++)
6513 emit_move_insn (adjust_address_nv (mem, inner_mode,
6514 i * GET_MODE_SIZE (inner_mode)),
6515 XVECEXP (vals, 0, i));
6516 emit_move_insn (target, mem);
6517 }
6518
6519 /* Set field ELT of TARGET to VAL. */
6520
6521 void
6522 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6523 {
6524 machine_mode mode = GET_MODE (target);
6525 machine_mode inner_mode = GET_MODE_INNER (mode);
6526 rtx reg = gen_reg_rtx (mode);
6527 rtx mask, mem, x;
6528 int width = GET_MODE_SIZE (inner_mode);
6529 int i;
6530
6531 val = force_reg (GET_MODE (val), val);
6532
6533 if (VECTOR_MEM_VSX_P (mode))
6534 {
6535 rtx insn = NULL_RTX;
6536 rtx elt_rtx = GEN_INT (elt);
6537
6538 if (mode == V2DFmode)
6539 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6540
6541 else if (mode == V2DImode)
6542 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6543
6544 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6545 {
6546 if (mode == V4SImode)
6547 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6548 else if (mode == V8HImode)
6549 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6550 else if (mode == V16QImode)
6551 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6552 else if (mode == V4SFmode)
6553 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6554 }
6555
6556 if (insn)
6557 {
6558 emit_insn (insn);
6559 return;
6560 }
6561 }
6562
6563 /* Simplify setting single element vectors like V1TImode. */
6564 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6565 {
6566 emit_move_insn (target, gen_lowpart (mode, val));
6567 return;
6568 }
6569
6570 /* Load single variable value. */
6571 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6572 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6573 x = gen_rtx_UNSPEC (VOIDmode,
6574 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6575 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6576 gen_rtvec (2,
6577 gen_rtx_SET (reg, mem),
6578 x)));
6579
6580 /* Linear sequence. */
6581 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6582 for (i = 0; i < 16; ++i)
6583 XVECEXP (mask, 0, i) = GEN_INT (i);
6584
6585 /* Set permute mask to insert element into target. */
6586 for (i = 0; i < width; ++i)
6587 XVECEXP (mask, 0, elt*width + i)
6588 = GEN_INT (i + 0x10);
6589 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6590
6591 if (BYTES_BIG_ENDIAN)
6592 x = gen_rtx_UNSPEC (mode,
6593 gen_rtvec (3, target, reg,
6594 force_reg (V16QImode, x)),
6595 UNSPEC_VPERM);
6596 else
6597 {
6598 if (TARGET_P9_VECTOR)
6599 x = gen_rtx_UNSPEC (mode,
6600 gen_rtvec (3, reg, target,
6601 force_reg (V16QImode, x)),
6602 UNSPEC_VPERMR);
6603 else
6604 {
6605 /* Invert selector. We prefer to generate VNAND on P8 so
6606 that future fusion opportunities can kick in, but must
6607 generate VNOR elsewhere. */
6608 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6609 rtx iorx = (TARGET_P8_VECTOR
6610 ? gen_rtx_IOR (V16QImode, notx, notx)
6611 : gen_rtx_AND (V16QImode, notx, notx));
6612 rtx tmp = gen_reg_rtx (V16QImode);
6613 emit_insn (gen_rtx_SET (tmp, iorx));
6614
6615 /* Permute with operands reversed and adjusted selector. */
6616 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6617 UNSPEC_VPERM);
6618 }
6619 }
6620
6621 emit_insn (gen_rtx_SET (target, x));
6622 }
6623
6624 /* Extract field ELT from VEC into TARGET. */
6625
6626 void
6627 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6628 {
6629 machine_mode mode = GET_MODE (vec);
6630 machine_mode inner_mode = GET_MODE_INNER (mode);
6631 rtx mem;
6632
6633 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6634 {
6635 switch (mode)
6636 {
6637 default:
6638 break;
6639 case E_V1TImode:
6640 emit_move_insn (target, gen_lowpart (TImode, vec));
6641 break;
6642 case E_V2DFmode:
6643 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6644 return;
6645 case E_V2DImode:
6646 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6647 return;
6648 case E_V4SFmode:
6649 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6650 return;
6651 case E_V16QImode:
6652 if (TARGET_DIRECT_MOVE_64BIT)
6653 {
6654 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6655 return;
6656 }
6657 else
6658 break;
6659 case E_V8HImode:
6660 if (TARGET_DIRECT_MOVE_64BIT)
6661 {
6662 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6663 return;
6664 }
6665 else
6666 break;
6667 case E_V4SImode:
6668 if (TARGET_DIRECT_MOVE_64BIT)
6669 {
6670 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6671 return;
6672 }
6673 break;
6674 }
6675 }
6676 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6677 && TARGET_DIRECT_MOVE_64BIT)
6678 {
6679 if (GET_MODE (elt) != DImode)
6680 {
6681 rtx tmp = gen_reg_rtx (DImode);
6682 convert_move (tmp, elt, 0);
6683 elt = tmp;
6684 }
6685 else if (!REG_P (elt))
6686 elt = force_reg (DImode, elt);
6687
6688 switch (mode)
6689 {
6690 case E_V1TImode:
6691 emit_move_insn (target, gen_lowpart (TImode, vec));
6692 return;
6693
6694 case E_V2DFmode:
6695 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6696 return;
6697
6698 case E_V2DImode:
6699 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6700 return;
6701
6702 case E_V4SFmode:
6703 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6704 return;
6705
6706 case E_V4SImode:
6707 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6708 return;
6709
6710 case E_V8HImode:
6711 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6712 return;
6713
6714 case E_V16QImode:
6715 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6716 return;
6717
6718 default:
6719 gcc_unreachable ();
6720 }
6721 }
6722
6723 /* Allocate mode-sized buffer. */
6724 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6725
6726 emit_move_insn (mem, vec);
6727 if (CONST_INT_P (elt))
6728 {
6729 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6730
6731 /* Add offset to field within buffer matching vector element. */
6732 mem = adjust_address_nv (mem, inner_mode,
6733 modulo_elt * GET_MODE_SIZE (inner_mode));
6734 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6735 }
6736 else
6737 {
6738 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6739 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6740 rtx new_addr = gen_reg_rtx (Pmode);
6741
6742 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6743 if (ele_size > 1)
6744 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6745 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6746 new_addr = change_address (mem, inner_mode, new_addr);
6747 emit_move_insn (target, new_addr);
6748 }
6749 }
6750
6751 /* Return the offset within a memory object (MEM) of a vector type to a given
6752 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6753 the element is constant, we return a constant integer.
6754
6755 Otherwise, we use a base register temporary to calculate the offset after
6756 masking it to fit within the bounds of the vector and scaling it. The
6757 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6758 built-in function. */
6759
6760 static rtx
6761 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6762 {
6763 if (CONST_INT_P (element))
6764 return GEN_INT (INTVAL (element) * scalar_size);
6765
6766 /* All insns should use the 'Q' constraint (address is a single register) if
6767 the element number is not a constant. */
6768 gcc_assert (satisfies_constraint_Q (mem));
6769
6770 /* Mask the element to make sure the element number is between 0 and the
6771 maximum number of elements - 1 so that we don't generate an address
6772 outside the vector. */
6773 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6774 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6775 emit_insn (gen_rtx_SET (base_tmp, and_op));
6776
6777 /* Shift the element to get the byte offset from the element number. */
6778 int shift = exact_log2 (scalar_size);
6779 gcc_assert (shift >= 0);
6780
6781 if (shift > 0)
6782 {
6783 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6784 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6785 }
6786
6787 return base_tmp;
6788 }
6789
6790 /* Helper function update PC-relative addresses when we are adjusting a memory
6791 address (ADDR) to a vector to point to a scalar field within the vector with
6792 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
6793 use the base register temporary (BASE_TMP) to form the address. */
6794
6795 static rtx
6796 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
6797 {
6798 rtx new_addr = NULL;
6799
6800 gcc_assert (CONST_INT_P (element_offset));
6801
6802 if (GET_CODE (addr) == CONST)
6803 addr = XEXP (addr, 0);
6804
6805 if (GET_CODE (addr) == PLUS)
6806 {
6807 rtx op0 = XEXP (addr, 0);
6808 rtx op1 = XEXP (addr, 1);
6809
6810 if (CONST_INT_P (op1))
6811 {
6812 HOST_WIDE_INT offset
6813 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
6814
6815 if (offset == 0)
6816 new_addr = op0;
6817
6818 else
6819 {
6820 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
6821 new_addr = gen_rtx_CONST (Pmode, plus);
6822 }
6823 }
6824
6825 else
6826 {
6827 emit_move_insn (base_tmp, addr);
6828 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6829 }
6830 }
6831
6832 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
6833 {
6834 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
6835 new_addr = gen_rtx_CONST (Pmode, plus);
6836 }
6837
6838 else
6839 gcc_unreachable ();
6840
6841 return new_addr;
6842 }
6843
6844 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6845 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6846 temporary (BASE_TMP) to fixup the address. Return the new memory address
6847 that is valid for reads or writes to a given register (SCALAR_REG).
6848
6849 This function is expected to be called after reload is completed when we are
6850 splitting insns. The temporary BASE_TMP might be set multiple times with
6851 this code. */
6852
6853 rtx
6854 rs6000_adjust_vec_address (rtx scalar_reg,
6855 rtx mem,
6856 rtx element,
6857 rtx base_tmp,
6858 machine_mode scalar_mode)
6859 {
6860 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6861 rtx addr = XEXP (mem, 0);
6862 rtx new_addr;
6863
6864 gcc_assert (!reg_mentioned_p (base_tmp, addr));
6865 gcc_assert (!reg_mentioned_p (base_tmp, element));
6866
6867 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6868 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6869
6870 /* Calculate what we need to add to the address to get the element
6871 address. */
6872 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
6873
6874 /* Create the new address pointing to the element within the vector. If we
6875 are adding 0, we don't have to change the address. */
6876 if (element_offset == const0_rtx)
6877 new_addr = addr;
6878
6879 /* A simple indirect address can be converted into a reg + offset
6880 address. */
6881 else if (REG_P (addr) || SUBREG_P (addr))
6882 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6883
6884 /* For references to local static variables, fold a constant offset into the
6885 address. */
6886 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
6887 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
6888
6889 /* Optimize D-FORM addresses with constant offset with a constant element, to
6890 include the element offset in the address directly. */
6891 else if (GET_CODE (addr) == PLUS)
6892 {
6893 rtx op0 = XEXP (addr, 0);
6894 rtx op1 = XEXP (addr, 1);
6895
6896 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6897 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6898 {
6899 /* op0 should never be r0, because r0+offset is not valid. But it
6900 doesn't hurt to make sure it is not r0. */
6901 gcc_assert (reg_or_subregno (op0) != 0);
6902
6903 /* D-FORM address with constant element number. */
6904 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6905 rtx offset_rtx = GEN_INT (offset);
6906 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6907 }
6908 else
6909 {
6910 /* If we don't have a D-FORM address with a constant element number,
6911 add the two elements in the current address. Then add the offset.
6912
6913 Previously, we tried to add the offset to OP1 and change the
6914 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6915 complicated because we had to verify that op1 was not GPR0 and we
6916 had a constant element offset (due to the way ADDI is defined).
6917 By doing the add of OP0 and OP1 first, and then adding in the
6918 offset, it has the benefit that if D-FORM instructions are
6919 allowed, the offset is part of the memory access to the vector
6920 element. */
6921 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
6922 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6923 }
6924 }
6925
6926 else
6927 {
6928 emit_move_insn (base_tmp, addr);
6929 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6930 }
6931
6932 /* If the address isn't valid, move the address into the temporary base
6933 register. Some reasons it could not be valid include:
6934
6935 The address offset overflowed the 16 or 34 bit offset size;
6936 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6937 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6938 Only X_FORM loads can be done, and the address is D_FORM. */
6939
6940 enum insn_form iform
6941 = address_to_insn_form (new_addr, scalar_mode,
6942 reg_to_non_prefixed (scalar_reg, scalar_mode));
6943
6944 if (iform == INSN_FORM_BAD)
6945 {
6946 emit_move_insn (base_tmp, new_addr);
6947 new_addr = base_tmp;
6948 }
6949
6950 return change_address (mem, scalar_mode, new_addr);
6951 }
6952
6953 /* Split a variable vec_extract operation into the component instructions. */
6954
6955 void
6956 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6957 rtx tmp_altivec)
6958 {
6959 machine_mode mode = GET_MODE (src);
6960 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6961 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6962 int byte_shift = exact_log2 (scalar_size);
6963
6964 gcc_assert (byte_shift >= 0);
6965
6966 /* If we are given a memory address, optimize to load just the element. We
6967 don't have to adjust the vector element number on little endian
6968 systems. */
6969 if (MEM_P (src))
6970 {
6971 emit_move_insn (dest,
6972 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
6973 scalar_mode));
6974 return;
6975 }
6976
6977 else if (REG_P (src) || SUBREG_P (src))
6978 {
6979 int num_elements = GET_MODE_NUNITS (mode);
6980 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6981 int bit_shift = 7 - exact_log2 (num_elements);
6982 rtx element2;
6983 unsigned int dest_regno = reg_or_subregno (dest);
6984 unsigned int src_regno = reg_or_subregno (src);
6985 unsigned int element_regno = reg_or_subregno (element);
6986
6987 gcc_assert (REG_P (tmp_gpr));
6988
6989 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6990 a general purpose register. */
6991 if (TARGET_P9_VECTOR
6992 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6993 && INT_REGNO_P (dest_regno)
6994 && ALTIVEC_REGNO_P (src_regno)
6995 && INT_REGNO_P (element_regno))
6996 {
6997 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6998 rtx element_si = gen_rtx_REG (SImode, element_regno);
6999
7000 if (mode == V16QImode)
7001 emit_insn (BYTES_BIG_ENDIAN
7002 ? gen_vextublx (dest_si, element_si, src)
7003 : gen_vextubrx (dest_si, element_si, src));
7004
7005 else if (mode == V8HImode)
7006 {
7007 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7008 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7009 emit_insn (BYTES_BIG_ENDIAN
7010 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7011 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7012 }
7013
7014
7015 else
7016 {
7017 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7018 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7019 emit_insn (BYTES_BIG_ENDIAN
7020 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7021 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7022 }
7023
7024 return;
7025 }
7026
7027
7028 gcc_assert (REG_P (tmp_altivec));
7029
7030 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7031 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7032 will shift the element into the upper position (adding 3 to convert a
7033 byte shift into a bit shift). */
7034 if (scalar_size == 8)
7035 {
7036 if (!BYTES_BIG_ENDIAN)
7037 {
7038 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7039 element2 = tmp_gpr;
7040 }
7041 else
7042 element2 = element;
7043
7044 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7045 bit. */
7046 emit_insn (gen_rtx_SET (tmp_gpr,
7047 gen_rtx_AND (DImode,
7048 gen_rtx_ASHIFT (DImode,
7049 element2,
7050 GEN_INT (6)),
7051 GEN_INT (64))));
7052 }
7053 else
7054 {
7055 if (!BYTES_BIG_ENDIAN)
7056 {
7057 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7058
7059 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7060 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7061 element2 = tmp_gpr;
7062 }
7063 else
7064 element2 = element;
7065
7066 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7067 }
7068
7069 /* Get the value into the lower byte of the Altivec register where VSLO
7070 expects it. */
7071 if (TARGET_P9_VECTOR)
7072 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7073 else if (can_create_pseudo_p ())
7074 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7075 else
7076 {
7077 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7078 emit_move_insn (tmp_di, tmp_gpr);
7079 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7080 }
7081
7082 /* Do the VSLO to get the value into the final location. */
7083 switch (mode)
7084 {
7085 case E_V2DFmode:
7086 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7087 return;
7088
7089 case E_V2DImode:
7090 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7091 return;
7092
7093 case E_V4SFmode:
7094 {
7095 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7096 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7097 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7098 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7099 tmp_altivec));
7100
7101 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7102 return;
7103 }
7104
7105 case E_V4SImode:
7106 case E_V8HImode:
7107 case E_V16QImode:
7108 {
7109 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7110 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7111 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7112 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7113 tmp_altivec));
7114 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7115 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7116 GEN_INT (64 - bits_in_element)));
7117 return;
7118 }
7119
7120 default:
7121 gcc_unreachable ();
7122 }
7123
7124 return;
7125 }
7126 else
7127 gcc_unreachable ();
7128 }
7129
7130 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7131 selects whether the alignment is abi mandated, optional, or
7132 both abi and optional alignment. */
7133
7134 unsigned int
7135 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7136 {
7137 if (how != align_opt)
7138 {
7139 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7140 align = 128;
7141 }
7142
7143 if (how != align_abi)
7144 {
7145 if (TREE_CODE (type) == ARRAY_TYPE
7146 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7147 {
7148 if (align < BITS_PER_WORD)
7149 align = BITS_PER_WORD;
7150 }
7151 }
7152
7153 return align;
7154 }
7155
7156 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7157 instructions simply ignore the low bits; VSX memory instructions
7158 are aligned to 4 or 8 bytes. */
7159
7160 static bool
7161 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7162 {
7163 return (STRICT_ALIGNMENT
7164 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7165 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7166 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7167 && (int) align < VECTOR_ALIGN (mode)))));
7168 }
7169
7170 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7171
7172 bool
7173 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7174 {
7175 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7176 {
7177 if (computed != 128)
7178 {
7179 static bool warned;
7180 if (!warned && warn_psabi)
7181 {
7182 warned = true;
7183 inform (input_location,
7184 "the layout of aggregates containing vectors with"
7185 " %d-byte alignment has changed in GCC 5",
7186 computed / BITS_PER_UNIT);
7187 }
7188 }
7189 /* In current GCC there is no special case. */
7190 return false;
7191 }
7192
7193 return false;
7194 }
7195
7196 /* AIX increases natural record alignment to doubleword if the first
7197 field is an FP double while the FP fields remain word aligned. */
7198
7199 unsigned int
7200 rs6000_special_round_type_align (tree type, unsigned int computed,
7201 unsigned int specified)
7202 {
7203 unsigned int align = MAX (computed, specified);
7204 tree field = TYPE_FIELDS (type);
7205
7206 /* Skip all non field decls */
7207 while (field != NULL
7208 && (TREE_CODE (field) != FIELD_DECL
7209 || DECL_FIELD_ABI_IGNORED (field)))
7210 field = DECL_CHAIN (field);
7211
7212 if (field != NULL && field != type)
7213 {
7214 type = TREE_TYPE (field);
7215 while (TREE_CODE (type) == ARRAY_TYPE)
7216 type = TREE_TYPE (type);
7217
7218 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7219 align = MAX (align, 64);
7220 }
7221
7222 return align;
7223 }
7224
7225 /* Darwin increases record alignment to the natural alignment of
7226 the first field. */
7227
7228 unsigned int
7229 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7230 unsigned int specified)
7231 {
7232 unsigned int align = MAX (computed, specified);
7233
7234 if (TYPE_PACKED (type))
7235 return align;
7236
7237 /* Find the first field, looking down into aggregates. */
7238 do {
7239 tree field = TYPE_FIELDS (type);
7240 /* Skip all non field decls */
7241 while (field != NULL
7242 && (TREE_CODE (field) != FIELD_DECL
7243 || DECL_FIELD_ABI_IGNORED (field)))
7244 field = DECL_CHAIN (field);
7245 if (! field)
7246 break;
7247 /* A packed field does not contribute any extra alignment. */
7248 if (DECL_PACKED (field))
7249 return align;
7250 type = TREE_TYPE (field);
7251 while (TREE_CODE (type) == ARRAY_TYPE)
7252 type = TREE_TYPE (type);
7253 } while (AGGREGATE_TYPE_P (type));
7254
7255 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7256 align = MAX (align, TYPE_ALIGN (type));
7257
7258 return align;
7259 }
7260
7261 /* Return 1 for an operand in small memory on V.4/eabi. */
7262
7263 int
7264 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7265 machine_mode mode ATTRIBUTE_UNUSED)
7266 {
7267 #if TARGET_ELF
7268 rtx sym_ref;
7269
7270 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7271 return 0;
7272
7273 if (DEFAULT_ABI != ABI_V4)
7274 return 0;
7275
7276 if (SYMBOL_REF_P (op))
7277 sym_ref = op;
7278
7279 else if (GET_CODE (op) != CONST
7280 || GET_CODE (XEXP (op, 0)) != PLUS
7281 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7282 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7283 return 0;
7284
7285 else
7286 {
7287 rtx sum = XEXP (op, 0);
7288 HOST_WIDE_INT summand;
7289
7290 /* We have to be careful here, because it is the referenced address
7291 that must be 32k from _SDA_BASE_, not just the symbol. */
7292 summand = INTVAL (XEXP (sum, 1));
7293 if (summand < 0 || summand > g_switch_value)
7294 return 0;
7295
7296 sym_ref = XEXP (sum, 0);
7297 }
7298
7299 return SYMBOL_REF_SMALL_P (sym_ref);
7300 #else
7301 return 0;
7302 #endif
7303 }
7304
7305 /* Return true if either operand is a general purpose register. */
7306
7307 bool
7308 gpr_or_gpr_p (rtx op0, rtx op1)
7309 {
7310 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7311 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7312 }
7313
7314 /* Return true if this is a move direct operation between GPR registers and
7315 floating point/VSX registers. */
7316
7317 bool
7318 direct_move_p (rtx op0, rtx op1)
7319 {
7320 if (!REG_P (op0) || !REG_P (op1))
7321 return false;
7322
7323 if (!TARGET_DIRECT_MOVE)
7324 return false;
7325
7326 int regno0 = REGNO (op0);
7327 int regno1 = REGNO (op1);
7328 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7329 return false;
7330
7331 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7332 return true;
7333
7334 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7335 return true;
7336
7337 return false;
7338 }
7339
7340 /* Return true if the ADDR is an acceptable address for a quad memory
7341 operation of mode MODE (either LQ/STQ for general purpose registers, or
7342 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7343 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7344 3.0 LXV/STXV instruction. */
7345
7346 bool
7347 quad_address_p (rtx addr, machine_mode mode, bool strict)
7348 {
7349 rtx op0, op1;
7350
7351 if (GET_MODE_SIZE (mode) != 16)
7352 return false;
7353
7354 if (legitimate_indirect_address_p (addr, strict))
7355 return true;
7356
7357 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7358 return false;
7359
7360 /* Is this a valid prefixed address? If the bottom four bits of the offset
7361 are non-zero, we could use a prefixed instruction (which does not have the
7362 DQ-form constraint that the traditional instruction had) instead of
7363 forcing the unaligned offset to a GPR. */
7364 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7365 return true;
7366
7367 if (GET_CODE (addr) != PLUS)
7368 return false;
7369
7370 op0 = XEXP (addr, 0);
7371 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7372 return false;
7373
7374 op1 = XEXP (addr, 1);
7375 if (!CONST_INT_P (op1))
7376 return false;
7377
7378 return quad_address_offset_p (INTVAL (op1));
7379 }
7380
7381 /* Return true if this is a load or store quad operation. This function does
7382 not handle the atomic quad memory instructions. */
7383
7384 bool
7385 quad_load_store_p (rtx op0, rtx op1)
7386 {
7387 bool ret;
7388
7389 if (!TARGET_QUAD_MEMORY)
7390 ret = false;
7391
7392 else if (REG_P (op0) && MEM_P (op1))
7393 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7394 && quad_memory_operand (op1, GET_MODE (op1))
7395 && !reg_overlap_mentioned_p (op0, op1));
7396
7397 else if (MEM_P (op0) && REG_P (op1))
7398 ret = (quad_memory_operand (op0, GET_MODE (op0))
7399 && quad_int_reg_operand (op1, GET_MODE (op1)));
7400
7401 else
7402 ret = false;
7403
7404 if (TARGET_DEBUG_ADDR)
7405 {
7406 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7407 ret ? "true" : "false");
7408 debug_rtx (gen_rtx_SET (op0, op1));
7409 }
7410
7411 return ret;
7412 }
7413
7414 /* Given an address, return a constant offset term if one exists. */
7415
7416 static rtx
7417 address_offset (rtx op)
7418 {
7419 if (GET_CODE (op) == PRE_INC
7420 || GET_CODE (op) == PRE_DEC)
7421 op = XEXP (op, 0);
7422 else if (GET_CODE (op) == PRE_MODIFY
7423 || GET_CODE (op) == LO_SUM)
7424 op = XEXP (op, 1);
7425
7426 if (GET_CODE (op) == CONST)
7427 op = XEXP (op, 0);
7428
7429 if (GET_CODE (op) == PLUS)
7430 op = XEXP (op, 1);
7431
7432 if (CONST_INT_P (op))
7433 return op;
7434
7435 return NULL_RTX;
7436 }
7437
7438 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7439 the mode. If we can't find (or don't know) the alignment of the symbol
7440 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7441 should be pessimistic]. Offsets are validated in the same way as for
7442 reg + offset. */
7443 static bool
7444 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7445 {
7446 /* We should not get here with this. */
7447 gcc_checking_assert (! mode_supports_dq_form (mode));
7448
7449 if (GET_CODE (x) == CONST)
7450 x = XEXP (x, 0);
7451
7452 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7453 x = XVECEXP (x, 0, 0);
7454
7455 rtx sym = NULL_RTX;
7456 unsigned HOST_WIDE_INT offset = 0;
7457
7458 if (GET_CODE (x) == PLUS)
7459 {
7460 sym = XEXP (x, 0);
7461 if (! SYMBOL_REF_P (sym))
7462 return false;
7463 if (!CONST_INT_P (XEXP (x, 1)))
7464 return false;
7465 offset = INTVAL (XEXP (x, 1));
7466 }
7467 else if (SYMBOL_REF_P (x))
7468 sym = x;
7469 else if (CONST_INT_P (x))
7470 offset = INTVAL (x);
7471 else if (GET_CODE (x) == LABEL_REF)
7472 offset = 0; // We assume code labels are Pmode aligned
7473 else
7474 return false; // not sure what we have here.
7475
7476 /* If we don't know the alignment of the thing to which the symbol refers,
7477 we assume optimistically it is "enough".
7478 ??? maybe we should be pessimistic instead. */
7479 unsigned align = 0;
7480
7481 if (sym)
7482 {
7483 tree decl = SYMBOL_REF_DECL (sym);
7484 #if TARGET_MACHO
7485 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7486 /* The decl in an indirection symbol is the original one, which might
7487 be less aligned than the indirection. Our indirections are always
7488 pointer-aligned. */
7489 ;
7490 else
7491 #endif
7492 if (decl && DECL_ALIGN (decl))
7493 align = DECL_ALIGN_UNIT (decl);
7494 }
7495
7496 unsigned int extra = 0;
7497 switch (mode)
7498 {
7499 case E_DFmode:
7500 case E_DDmode:
7501 case E_DImode:
7502 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7503 addressing. */
7504 if (VECTOR_MEM_VSX_P (mode))
7505 return false;
7506
7507 if (!TARGET_POWERPC64)
7508 extra = 4;
7509 else if ((offset & 3) || (align & 3))
7510 return false;
7511 break;
7512
7513 case E_TFmode:
7514 case E_IFmode:
7515 case E_KFmode:
7516 case E_TDmode:
7517 case E_TImode:
7518 case E_PTImode:
7519 extra = 8;
7520 if (!TARGET_POWERPC64)
7521 extra = 12;
7522 else if ((offset & 3) || (align & 3))
7523 return false;
7524 break;
7525
7526 default:
7527 break;
7528 }
7529
7530 /* We only care if the access(es) would cause a change to the high part. */
7531 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7532 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7533 }
7534
7535 /* Return true if the MEM operand is a memory operand suitable for use
7536 with a (full width, possibly multiple) gpr load/store. On
7537 powerpc64 this means the offset must be divisible by 4.
7538 Implements 'Y' constraint.
7539
7540 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7541 a constraint function we know the operand has satisfied a suitable
7542 memory predicate.
7543
7544 Offsetting a lo_sum should not be allowed, except where we know by
7545 alignment that a 32k boundary is not crossed. Note that by
7546 "offsetting" here we mean a further offset to access parts of the
7547 MEM. It's fine to have a lo_sum where the inner address is offset
7548 from a sym, since the same sym+offset will appear in the high part
7549 of the address calculation. */
7550
7551 bool
7552 mem_operand_gpr (rtx op, machine_mode mode)
7553 {
7554 unsigned HOST_WIDE_INT offset;
7555 int extra;
7556 rtx addr = XEXP (op, 0);
7557
7558 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7559 if (TARGET_UPDATE
7560 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7561 && mode_supports_pre_incdec_p (mode)
7562 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7563 return true;
7564
7565 /* Allow prefixed instructions if supported. If the bottom two bits of the
7566 offset are non-zero, we could use a prefixed instruction (which does not
7567 have the DS-form constraint that the traditional instruction had) instead
7568 of forcing the unaligned offset to a GPR. */
7569 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7570 return true;
7571
7572 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7573 really OK. Doing this early avoids teaching all the other machinery
7574 about them. */
7575 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7576 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7577
7578 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7579 if (!rs6000_offsettable_memref_p (op, mode, false))
7580 return false;
7581
7582 op = address_offset (addr);
7583 if (op == NULL_RTX)
7584 return true;
7585
7586 offset = INTVAL (op);
7587 if (TARGET_POWERPC64 && (offset & 3) != 0)
7588 return false;
7589
7590 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7591 if (extra < 0)
7592 extra = 0;
7593
7594 if (GET_CODE (addr) == LO_SUM)
7595 /* For lo_sum addresses, we must allow any offset except one that
7596 causes a wrap, so test only the low 16 bits. */
7597 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7598
7599 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7600 }
7601
7602 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7603 enforce an offset divisible by 4 even for 32-bit. */
7604
7605 bool
7606 mem_operand_ds_form (rtx op, machine_mode mode)
7607 {
7608 unsigned HOST_WIDE_INT offset;
7609 int extra;
7610 rtx addr = XEXP (op, 0);
7611
7612 /* Allow prefixed instructions if supported. If the bottom two bits of the
7613 offset are non-zero, we could use a prefixed instruction (which does not
7614 have the DS-form constraint that the traditional instruction had) instead
7615 of forcing the unaligned offset to a GPR. */
7616 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7617 return true;
7618
7619 if (!offsettable_address_p (false, mode, addr))
7620 return false;
7621
7622 op = address_offset (addr);
7623 if (op == NULL_RTX)
7624 return true;
7625
7626 offset = INTVAL (op);
7627 if ((offset & 3) != 0)
7628 return false;
7629
7630 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7631 if (extra < 0)
7632 extra = 0;
7633
7634 if (GET_CODE (addr) == LO_SUM)
7635 /* For lo_sum addresses, we must allow any offset except one that
7636 causes a wrap, so test only the low 16 bits. */
7637 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7638
7639 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7640 }
7641 \f
7642 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7643
7644 static bool
7645 reg_offset_addressing_ok_p (machine_mode mode)
7646 {
7647 switch (mode)
7648 {
7649 case E_V16QImode:
7650 case E_V8HImode:
7651 case E_V4SFmode:
7652 case E_V4SImode:
7653 case E_V2DFmode:
7654 case E_V2DImode:
7655 case E_V1TImode:
7656 case E_TImode:
7657 case E_TFmode:
7658 case E_KFmode:
7659 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7660 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7661 a vector mode, if we want to use the VSX registers to move it around,
7662 we need to restrict ourselves to reg+reg addressing. Similarly for
7663 IEEE 128-bit floating point that is passed in a single vector
7664 register. */
7665 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7666 return mode_supports_dq_form (mode);
7667 break;
7668
7669 case E_SDmode:
7670 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7671 addressing for the LFIWZX and STFIWX instructions. */
7672 if (TARGET_NO_SDMODE_STACK)
7673 return false;
7674 break;
7675
7676 default:
7677 break;
7678 }
7679
7680 return true;
7681 }
7682
7683 static bool
7684 virtual_stack_registers_memory_p (rtx op)
7685 {
7686 int regnum;
7687
7688 if (REG_P (op))
7689 regnum = REGNO (op);
7690
7691 else if (GET_CODE (op) == PLUS
7692 && REG_P (XEXP (op, 0))
7693 && CONST_INT_P (XEXP (op, 1)))
7694 regnum = REGNO (XEXP (op, 0));
7695
7696 else
7697 return false;
7698
7699 return (regnum >= FIRST_VIRTUAL_REGISTER
7700 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7701 }
7702
7703 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7704 is known to not straddle a 32k boundary. This function is used
7705 to determine whether -mcmodel=medium code can use TOC pointer
7706 relative addressing for OP. This means the alignment of the TOC
7707 pointer must also be taken into account, and unfortunately that is
7708 only 8 bytes. */
7709
7710 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7711 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7712 #endif
7713
7714 static bool
7715 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7716 machine_mode mode)
7717 {
7718 tree decl;
7719 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7720
7721 if (!SYMBOL_REF_P (op))
7722 return false;
7723
7724 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7725 SYMBOL_REF. */
7726 if (mode_supports_dq_form (mode))
7727 return false;
7728
7729 dsize = GET_MODE_SIZE (mode);
7730 decl = SYMBOL_REF_DECL (op);
7731 if (!decl)
7732 {
7733 if (dsize == 0)
7734 return false;
7735
7736 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7737 replacing memory addresses with an anchor plus offset. We
7738 could find the decl by rummaging around in the block->objects
7739 VEC for the given offset but that seems like too much work. */
7740 dalign = BITS_PER_UNIT;
7741 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7742 && SYMBOL_REF_ANCHOR_P (op)
7743 && SYMBOL_REF_BLOCK (op) != NULL)
7744 {
7745 struct object_block *block = SYMBOL_REF_BLOCK (op);
7746
7747 dalign = block->alignment;
7748 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7749 }
7750 else if (CONSTANT_POOL_ADDRESS_P (op))
7751 {
7752 /* It would be nice to have get_pool_align().. */
7753 machine_mode cmode = get_pool_mode (op);
7754
7755 dalign = GET_MODE_ALIGNMENT (cmode);
7756 }
7757 }
7758 else if (DECL_P (decl))
7759 {
7760 dalign = DECL_ALIGN (decl);
7761
7762 if (dsize == 0)
7763 {
7764 /* Allow BLKmode when the entire object is known to not
7765 cross a 32k boundary. */
7766 if (!DECL_SIZE_UNIT (decl))
7767 return false;
7768
7769 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7770 return false;
7771
7772 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7773 if (dsize > 32768)
7774 return false;
7775
7776 dalign /= BITS_PER_UNIT;
7777 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7778 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7779 return dalign >= dsize;
7780 }
7781 }
7782 else
7783 gcc_unreachable ();
7784
7785 /* Find how many bits of the alignment we know for this access. */
7786 dalign /= BITS_PER_UNIT;
7787 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7788 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7789 mask = dalign - 1;
7790 lsb = offset & -offset;
7791 mask &= lsb - 1;
7792 dalign = mask + 1;
7793
7794 return dalign >= dsize;
7795 }
7796
7797 static bool
7798 constant_pool_expr_p (rtx op)
7799 {
7800 rtx base, offset;
7801
7802 split_const (op, &base, &offset);
7803 return (SYMBOL_REF_P (base)
7804 && CONSTANT_POOL_ADDRESS_P (base)
7805 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7806 }
7807
7808 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7809 use that as the register to put the HIGH value into if register allocation
7810 is already done. */
7811
7812 rtx
7813 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7814 {
7815 rtx tocrel, tocreg, hi;
7816
7817 gcc_assert (TARGET_TOC);
7818
7819 if (TARGET_DEBUG_ADDR)
7820 {
7821 if (SYMBOL_REF_P (symbol))
7822 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7823 XSTR (symbol, 0));
7824 else
7825 {
7826 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7827 GET_RTX_NAME (GET_CODE (symbol)));
7828 debug_rtx (symbol);
7829 }
7830 }
7831
7832 if (!can_create_pseudo_p ())
7833 df_set_regs_ever_live (TOC_REGISTER, true);
7834
7835 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7836 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7837 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7838 return tocrel;
7839
7840 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7841 if (largetoc_reg != NULL)
7842 {
7843 emit_move_insn (largetoc_reg, hi);
7844 hi = largetoc_reg;
7845 }
7846 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7847 }
7848
7849 /* These are only used to pass through from print_operand/print_operand_address
7850 to rs6000_output_addr_const_extra over the intervening function
7851 output_addr_const which is not target code. */
7852 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7853
7854 /* Return true if OP is a toc pointer relative address (the output
7855 of create_TOC_reference). If STRICT, do not match non-split
7856 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7857 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7858 TOCREL_OFFSET_RET respectively. */
7859
7860 bool
7861 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7862 const_rtx *tocrel_offset_ret)
7863 {
7864 if (!TARGET_TOC)
7865 return false;
7866
7867 if (TARGET_CMODEL != CMODEL_SMALL)
7868 {
7869 /* When strict ensure we have everything tidy. */
7870 if (strict
7871 && !(GET_CODE (op) == LO_SUM
7872 && REG_P (XEXP (op, 0))
7873 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7874 return false;
7875
7876 /* When not strict, allow non-split TOC addresses and also allow
7877 (lo_sum (high ..)) TOC addresses created during reload. */
7878 if (GET_CODE (op) == LO_SUM)
7879 op = XEXP (op, 1);
7880 }
7881
7882 const_rtx tocrel_base = op;
7883 const_rtx tocrel_offset = const0_rtx;
7884
7885 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7886 {
7887 tocrel_base = XEXP (op, 0);
7888 tocrel_offset = XEXP (op, 1);
7889 }
7890
7891 if (tocrel_base_ret)
7892 *tocrel_base_ret = tocrel_base;
7893 if (tocrel_offset_ret)
7894 *tocrel_offset_ret = tocrel_offset;
7895
7896 return (GET_CODE (tocrel_base) == UNSPEC
7897 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7898 && REG_P (XVECEXP (tocrel_base, 0, 1))
7899 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7900 }
7901
7902 /* Return true if X is a constant pool address, and also for cmodel=medium
7903 if X is a toc-relative address known to be offsettable within MODE. */
7904
7905 bool
7906 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7907 bool strict)
7908 {
7909 const_rtx tocrel_base, tocrel_offset;
7910 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7911 && (TARGET_CMODEL != CMODEL_MEDIUM
7912 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7913 || mode == QImode
7914 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7915 INTVAL (tocrel_offset), mode)));
7916 }
7917
7918 static bool
7919 legitimate_small_data_p (machine_mode mode, rtx x)
7920 {
7921 return (DEFAULT_ABI == ABI_V4
7922 && !flag_pic && !TARGET_TOC
7923 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7924 && small_data_operand (x, mode));
7925 }
7926
7927 bool
7928 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7929 bool strict, bool worst_case)
7930 {
7931 unsigned HOST_WIDE_INT offset;
7932 unsigned int extra;
7933
7934 if (GET_CODE (x) != PLUS)
7935 return false;
7936 if (!REG_P (XEXP (x, 0)))
7937 return false;
7938 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7939 return false;
7940 if (mode_supports_dq_form (mode))
7941 return quad_address_p (x, mode, strict);
7942 if (!reg_offset_addressing_ok_p (mode))
7943 return virtual_stack_registers_memory_p (x);
7944 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7945 return true;
7946 if (!CONST_INT_P (XEXP (x, 1)))
7947 return false;
7948
7949 offset = INTVAL (XEXP (x, 1));
7950 extra = 0;
7951 switch (mode)
7952 {
7953 case E_DFmode:
7954 case E_DDmode:
7955 case E_DImode:
7956 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7957 addressing. */
7958 if (VECTOR_MEM_VSX_P (mode))
7959 return false;
7960
7961 if (!worst_case)
7962 break;
7963 if (!TARGET_POWERPC64)
7964 extra = 4;
7965 else if (offset & 3)
7966 return false;
7967 break;
7968
7969 case E_TFmode:
7970 case E_IFmode:
7971 case E_KFmode:
7972 case E_TDmode:
7973 case E_TImode:
7974 case E_PTImode:
7975 extra = 8;
7976 if (!worst_case)
7977 break;
7978 if (!TARGET_POWERPC64)
7979 extra = 12;
7980 else if (offset & 3)
7981 return false;
7982 break;
7983
7984 default:
7985 break;
7986 }
7987
7988 if (TARGET_PREFIXED)
7989 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7990 else
7991 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7992 }
7993
7994 bool
7995 legitimate_indexed_address_p (rtx x, int strict)
7996 {
7997 rtx op0, op1;
7998
7999 if (GET_CODE (x) != PLUS)
8000 return false;
8001
8002 op0 = XEXP (x, 0);
8003 op1 = XEXP (x, 1);
8004
8005 return (REG_P (op0) && REG_P (op1)
8006 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8007 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8008 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8009 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8010 }
8011
8012 bool
8013 avoiding_indexed_address_p (machine_mode mode)
8014 {
8015 /* Avoid indexed addressing for modes that have non-indexed
8016 load/store instruction forms. */
8017 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8018 }
8019
8020 bool
8021 legitimate_indirect_address_p (rtx x, int strict)
8022 {
8023 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8024 }
8025
8026 bool
8027 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8028 {
8029 if (!TARGET_MACHO || !flag_pic
8030 || mode != SImode || !MEM_P (x))
8031 return false;
8032 x = XEXP (x, 0);
8033
8034 if (GET_CODE (x) != LO_SUM)
8035 return false;
8036 if (!REG_P (XEXP (x, 0)))
8037 return false;
8038 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8039 return false;
8040 x = XEXP (x, 1);
8041
8042 return CONSTANT_P (x);
8043 }
8044
8045 static bool
8046 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8047 {
8048 if (GET_CODE (x) != LO_SUM)
8049 return false;
8050 if (!REG_P (XEXP (x, 0)))
8051 return false;
8052 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8053 return false;
8054 /* quad word addresses are restricted, and we can't use LO_SUM. */
8055 if (mode_supports_dq_form (mode))
8056 return false;
8057 x = XEXP (x, 1);
8058
8059 if (TARGET_ELF || TARGET_MACHO)
8060 {
8061 bool large_toc_ok;
8062
8063 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8064 return false;
8065 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8066 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8067 recognizes some LO_SUM addresses as valid although this
8068 function says opposite. In most cases, LRA through different
8069 transformations can generate correct code for address reloads.
8070 It cannot manage only some LO_SUM cases. So we need to add
8071 code here saying that some addresses are still valid. */
8072 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8073 && small_toc_ref (x, VOIDmode));
8074 if (TARGET_TOC && ! large_toc_ok)
8075 return false;
8076 if (GET_MODE_NUNITS (mode) != 1)
8077 return false;
8078 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8079 && !(/* ??? Assume floating point reg based on mode? */
8080 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8081 return false;
8082
8083 return CONSTANT_P (x) || large_toc_ok;
8084 }
8085
8086 return false;
8087 }
8088
8089
8090 /* Try machine-dependent ways of modifying an illegitimate address
8091 to be legitimate. If we find one, return the new, valid address.
8092 This is used from only one place: `memory_address' in explow.c.
8093
8094 OLDX is the address as it was before break_out_memory_refs was
8095 called. In some cases it is useful to look at this to decide what
8096 needs to be done.
8097
8098 It is always safe for this function to do nothing. It exists to
8099 recognize opportunities to optimize the output.
8100
8101 On RS/6000, first check for the sum of a register with a constant
8102 integer that is out of range. If so, generate code to add the
8103 constant with the low-order 16 bits masked to the register and force
8104 this result into another register (this can be done with `cau').
8105 Then generate an address of REG+(CONST&0xffff), allowing for the
8106 possibility of bit 16 being a one.
8107
8108 Then check for the sum of a register and something not constant, try to
8109 load the other things into a register and return the sum. */
8110
8111 static rtx
8112 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8113 machine_mode mode)
8114 {
8115 unsigned int extra;
8116
8117 if (!reg_offset_addressing_ok_p (mode)
8118 || mode_supports_dq_form (mode))
8119 {
8120 if (virtual_stack_registers_memory_p (x))
8121 return x;
8122
8123 /* In theory we should not be seeing addresses of the form reg+0,
8124 but just in case it is generated, optimize it away. */
8125 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8126 return force_reg (Pmode, XEXP (x, 0));
8127
8128 /* For TImode with load/store quad, restrict addresses to just a single
8129 pointer, so it works with both GPRs and VSX registers. */
8130 /* Make sure both operands are registers. */
8131 else if (GET_CODE (x) == PLUS
8132 && (mode != TImode || !TARGET_VSX))
8133 return gen_rtx_PLUS (Pmode,
8134 force_reg (Pmode, XEXP (x, 0)),
8135 force_reg (Pmode, XEXP (x, 1)));
8136 else
8137 return force_reg (Pmode, x);
8138 }
8139 if (SYMBOL_REF_P (x))
8140 {
8141 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8142 if (model != 0)
8143 return rs6000_legitimize_tls_address (x, model);
8144 }
8145
8146 extra = 0;
8147 switch (mode)
8148 {
8149 case E_TFmode:
8150 case E_TDmode:
8151 case E_TImode:
8152 case E_PTImode:
8153 case E_IFmode:
8154 case E_KFmode:
8155 /* As in legitimate_offset_address_p we do not assume
8156 worst-case. The mode here is just a hint as to the registers
8157 used. A TImode is usually in gprs, but may actually be in
8158 fprs. Leave worst-case scenario for reload to handle via
8159 insn constraints. PTImode is only GPRs. */
8160 extra = 8;
8161 break;
8162 default:
8163 break;
8164 }
8165
8166 if (GET_CODE (x) == PLUS
8167 && REG_P (XEXP (x, 0))
8168 && CONST_INT_P (XEXP (x, 1))
8169 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8170 >= 0x10000 - extra))
8171 {
8172 HOST_WIDE_INT high_int, low_int;
8173 rtx sum;
8174 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8175 if (low_int >= 0x8000 - extra)
8176 low_int = 0;
8177 high_int = INTVAL (XEXP (x, 1)) - low_int;
8178 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8179 GEN_INT (high_int)), 0);
8180 return plus_constant (Pmode, sum, low_int);
8181 }
8182 else if (GET_CODE (x) == PLUS
8183 && REG_P (XEXP (x, 0))
8184 && !CONST_INT_P (XEXP (x, 1))
8185 && GET_MODE_NUNITS (mode) == 1
8186 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8187 || (/* ??? Assume floating point reg based on mode? */
8188 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8189 && !avoiding_indexed_address_p (mode))
8190 {
8191 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8192 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8193 }
8194 else if ((TARGET_ELF
8195 #if TARGET_MACHO
8196 || !MACHO_DYNAMIC_NO_PIC_P
8197 #endif
8198 )
8199 && TARGET_32BIT
8200 && TARGET_NO_TOC_OR_PCREL
8201 && !flag_pic
8202 && !CONST_INT_P (x)
8203 && !CONST_WIDE_INT_P (x)
8204 && !CONST_DOUBLE_P (x)
8205 && CONSTANT_P (x)
8206 && GET_MODE_NUNITS (mode) == 1
8207 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8208 || (/* ??? Assume floating point reg based on mode? */
8209 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8210 {
8211 rtx reg = gen_reg_rtx (Pmode);
8212 if (TARGET_ELF)
8213 emit_insn (gen_elf_high (reg, x));
8214 else
8215 emit_insn (gen_macho_high (Pmode, reg, x));
8216 return gen_rtx_LO_SUM (Pmode, reg, x);
8217 }
8218 else if (TARGET_TOC
8219 && SYMBOL_REF_P (x)
8220 && constant_pool_expr_p (x)
8221 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8222 return create_TOC_reference (x, NULL_RTX);
8223 else
8224 return x;
8225 }
8226
8227 /* Debug version of rs6000_legitimize_address. */
8228 static rtx
8229 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8230 {
8231 rtx ret;
8232 rtx_insn *insns;
8233
8234 start_sequence ();
8235 ret = rs6000_legitimize_address (x, oldx, mode);
8236 insns = get_insns ();
8237 end_sequence ();
8238
8239 if (ret != x)
8240 {
8241 fprintf (stderr,
8242 "\nrs6000_legitimize_address: mode %s, old code %s, "
8243 "new code %s, modified\n",
8244 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8245 GET_RTX_NAME (GET_CODE (ret)));
8246
8247 fprintf (stderr, "Original address:\n");
8248 debug_rtx (x);
8249
8250 fprintf (stderr, "oldx:\n");
8251 debug_rtx (oldx);
8252
8253 fprintf (stderr, "New address:\n");
8254 debug_rtx (ret);
8255
8256 if (insns)
8257 {
8258 fprintf (stderr, "Insns added:\n");
8259 debug_rtx_list (insns, 20);
8260 }
8261 }
8262 else
8263 {
8264 fprintf (stderr,
8265 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8266 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8267
8268 debug_rtx (x);
8269 }
8270
8271 if (insns)
8272 emit_insn (insns);
8273
8274 return ret;
8275 }
8276
8277 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8278 We need to emit DTP-relative relocations. */
8279
8280 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8281 static void
8282 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8283 {
8284 switch (size)
8285 {
8286 case 4:
8287 fputs ("\t.long\t", file);
8288 break;
8289 case 8:
8290 fputs (DOUBLE_INT_ASM_OP, file);
8291 break;
8292 default:
8293 gcc_unreachable ();
8294 }
8295 output_addr_const (file, x);
8296 if (TARGET_ELF)
8297 fputs ("@dtprel+0x8000", file);
8298 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8299 {
8300 switch (SYMBOL_REF_TLS_MODEL (x))
8301 {
8302 case 0:
8303 break;
8304 case TLS_MODEL_LOCAL_EXEC:
8305 fputs ("@le", file);
8306 break;
8307 case TLS_MODEL_INITIAL_EXEC:
8308 fputs ("@ie", file);
8309 break;
8310 case TLS_MODEL_GLOBAL_DYNAMIC:
8311 case TLS_MODEL_LOCAL_DYNAMIC:
8312 fputs ("@m", file);
8313 break;
8314 default:
8315 gcc_unreachable ();
8316 }
8317 }
8318 }
8319
8320 /* Return true if X is a symbol that refers to real (rather than emulated)
8321 TLS. */
8322
8323 static bool
8324 rs6000_real_tls_symbol_ref_p (rtx x)
8325 {
8326 return (SYMBOL_REF_P (x)
8327 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8328 }
8329
8330 /* In the name of slightly smaller debug output, and to cater to
8331 general assembler lossage, recognize various UNSPEC sequences
8332 and turn them back into a direct symbol reference. */
8333
8334 static rtx
8335 rs6000_delegitimize_address (rtx orig_x)
8336 {
8337 rtx x, y, offset;
8338
8339 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8340 orig_x = XVECEXP (orig_x, 0, 0);
8341
8342 orig_x = delegitimize_mem_from_attrs (orig_x);
8343
8344 x = orig_x;
8345 if (MEM_P (x))
8346 x = XEXP (x, 0);
8347
8348 y = x;
8349 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8350 y = XEXP (y, 1);
8351
8352 offset = NULL_RTX;
8353 if (GET_CODE (y) == PLUS
8354 && GET_MODE (y) == Pmode
8355 && CONST_INT_P (XEXP (y, 1)))
8356 {
8357 offset = XEXP (y, 1);
8358 y = XEXP (y, 0);
8359 }
8360
8361 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8362 {
8363 y = XVECEXP (y, 0, 0);
8364
8365 #ifdef HAVE_AS_TLS
8366 /* Do not associate thread-local symbols with the original
8367 constant pool symbol. */
8368 if (TARGET_XCOFF
8369 && SYMBOL_REF_P (y)
8370 && CONSTANT_POOL_ADDRESS_P (y)
8371 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8372 return orig_x;
8373 #endif
8374
8375 if (offset != NULL_RTX)
8376 y = gen_rtx_PLUS (Pmode, y, offset);
8377 if (!MEM_P (orig_x))
8378 return y;
8379 else
8380 return replace_equiv_address_nv (orig_x, y);
8381 }
8382
8383 if (TARGET_MACHO
8384 && GET_CODE (orig_x) == LO_SUM
8385 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8386 {
8387 y = XEXP (XEXP (orig_x, 1), 0);
8388 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8389 return XVECEXP (y, 0, 0);
8390 }
8391
8392 return orig_x;
8393 }
8394
8395 /* Return true if X shouldn't be emitted into the debug info.
8396 The linker doesn't like .toc section references from
8397 .debug_* sections, so reject .toc section symbols. */
8398
8399 static bool
8400 rs6000_const_not_ok_for_debug_p (rtx x)
8401 {
8402 if (GET_CODE (x) == UNSPEC)
8403 return true;
8404 if (SYMBOL_REF_P (x)
8405 && CONSTANT_POOL_ADDRESS_P (x))
8406 {
8407 rtx c = get_pool_constant (x);
8408 machine_mode cmode = get_pool_mode (x);
8409 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8410 return true;
8411 }
8412
8413 return false;
8414 }
8415
8416 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8417
8418 static bool
8419 rs6000_legitimate_combined_insn (rtx_insn *insn)
8420 {
8421 int icode = INSN_CODE (insn);
8422
8423 /* Reject creating doloop insns. Combine should not be allowed
8424 to create these for a number of reasons:
8425 1) In a nested loop, if combine creates one of these in an
8426 outer loop and the register allocator happens to allocate ctr
8427 to the outer loop insn, then the inner loop can't use ctr.
8428 Inner loops ought to be more highly optimized.
8429 2) Combine often wants to create one of these from what was
8430 originally a three insn sequence, first combining the three
8431 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8432 allocated ctr, the splitter takes use back to the three insn
8433 sequence. It's better to stop combine at the two insn
8434 sequence.
8435 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8436 insns, the register allocator sometimes uses floating point
8437 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8438 jump insn and output reloads are not implemented for jumps,
8439 the ctrsi/ctrdi splitters need to handle all possible cases.
8440 That's a pain, and it gets to be seriously difficult when a
8441 splitter that runs after reload needs memory to transfer from
8442 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8443 for the difficult case. It's better to not create problems
8444 in the first place. */
8445 if (icode != CODE_FOR_nothing
8446 && (icode == CODE_FOR_bdz_si
8447 || icode == CODE_FOR_bdz_di
8448 || icode == CODE_FOR_bdnz_si
8449 || icode == CODE_FOR_bdnz_di
8450 || icode == CODE_FOR_bdztf_si
8451 || icode == CODE_FOR_bdztf_di
8452 || icode == CODE_FOR_bdnztf_si
8453 || icode == CODE_FOR_bdnztf_di))
8454 return false;
8455
8456 return true;
8457 }
8458
8459 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8460
8461 static GTY(()) rtx rs6000_tls_symbol;
8462 static rtx
8463 rs6000_tls_get_addr (void)
8464 {
8465 if (!rs6000_tls_symbol)
8466 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8467
8468 return rs6000_tls_symbol;
8469 }
8470
8471 /* Construct the SYMBOL_REF for TLS GOT references. */
8472
8473 static GTY(()) rtx rs6000_got_symbol;
8474 rtx
8475 rs6000_got_sym (void)
8476 {
8477 if (!rs6000_got_symbol)
8478 {
8479 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8480 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8481 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8482 }
8483
8484 return rs6000_got_symbol;
8485 }
8486
8487 /* AIX Thread-Local Address support. */
8488
8489 static rtx
8490 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8491 {
8492 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8493 const char *name;
8494 char *tlsname;
8495
8496 name = XSTR (addr, 0);
8497 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8498 or the symbol will be in TLS private data section. */
8499 if (name[strlen (name) - 1] != ']'
8500 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8501 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8502 {
8503 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8504 strcpy (tlsname, name);
8505 strcat (tlsname,
8506 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8507 tlsaddr = copy_rtx (addr);
8508 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8509 }
8510 else
8511 tlsaddr = addr;
8512
8513 /* Place addr into TOC constant pool. */
8514 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8515
8516 /* Output the TOC entry and create the MEM referencing the value. */
8517 if (constant_pool_expr_p (XEXP (sym, 0))
8518 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8519 {
8520 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8521 mem = gen_const_mem (Pmode, tocref);
8522 set_mem_alias_set (mem, get_TOC_alias_set ());
8523 }
8524 else
8525 return sym;
8526
8527 /* Use global-dynamic for local-dynamic. */
8528 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8529 || model == TLS_MODEL_LOCAL_DYNAMIC)
8530 {
8531 /* Create new TOC reference for @m symbol. */
8532 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8533 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8534 strcpy (tlsname, "*LCM");
8535 strcat (tlsname, name + 3);
8536 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8537 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8538 tocref = create_TOC_reference (modaddr, NULL_RTX);
8539 rtx modmem = gen_const_mem (Pmode, tocref);
8540 set_mem_alias_set (modmem, get_TOC_alias_set ());
8541
8542 rtx modreg = gen_reg_rtx (Pmode);
8543 emit_insn (gen_rtx_SET (modreg, modmem));
8544
8545 tmpreg = gen_reg_rtx (Pmode);
8546 emit_insn (gen_rtx_SET (tmpreg, mem));
8547
8548 dest = gen_reg_rtx (Pmode);
8549 if (TARGET_32BIT)
8550 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8551 else
8552 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8553 return dest;
8554 }
8555 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8556 else if (TARGET_32BIT)
8557 {
8558 tlsreg = gen_reg_rtx (SImode);
8559 emit_insn (gen_tls_get_tpointer (tlsreg));
8560 }
8561 else
8562 tlsreg = gen_rtx_REG (DImode, 13);
8563
8564 /* Load the TOC value into temporary register. */
8565 tmpreg = gen_reg_rtx (Pmode);
8566 emit_insn (gen_rtx_SET (tmpreg, mem));
8567 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8568 gen_rtx_MINUS (Pmode, addr, tlsreg));
8569
8570 /* Add TOC symbol value to TLS pointer. */
8571 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8572
8573 return dest;
8574 }
8575
8576 /* Passes the tls arg value for global dynamic and local dynamic
8577 emit_library_call_value in rs6000_legitimize_tls_address to
8578 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8579 marker relocs put on __tls_get_addr calls. */
8580 static rtx global_tlsarg;
8581
8582 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8583 this (thread-local) address. */
8584
8585 static rtx
8586 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8587 {
8588 rtx dest, insn;
8589
8590 if (TARGET_XCOFF)
8591 return rs6000_legitimize_tls_address_aix (addr, model);
8592
8593 dest = gen_reg_rtx (Pmode);
8594 if (model == TLS_MODEL_LOCAL_EXEC
8595 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8596 {
8597 rtx tlsreg;
8598
8599 if (TARGET_64BIT)
8600 {
8601 tlsreg = gen_rtx_REG (Pmode, 13);
8602 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8603 }
8604 else
8605 {
8606 tlsreg = gen_rtx_REG (Pmode, 2);
8607 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8608 }
8609 emit_insn (insn);
8610 }
8611 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8612 {
8613 rtx tlsreg, tmp;
8614
8615 tmp = gen_reg_rtx (Pmode);
8616 if (TARGET_64BIT)
8617 {
8618 tlsreg = gen_rtx_REG (Pmode, 13);
8619 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8620 }
8621 else
8622 {
8623 tlsreg = gen_rtx_REG (Pmode, 2);
8624 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8625 }
8626 emit_insn (insn);
8627 if (TARGET_64BIT)
8628 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8629 else
8630 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8631 emit_insn (insn);
8632 }
8633 else
8634 {
8635 rtx got, tga, tmp1, tmp2;
8636
8637 /* We currently use relocations like @got@tlsgd for tls, which
8638 means the linker will handle allocation of tls entries, placing
8639 them in the .got section. So use a pointer to the .got section,
8640 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8641 or to secondary GOT sections used by 32-bit -fPIC. */
8642 if (rs6000_pcrel_p (cfun))
8643 got = const0_rtx;
8644 else if (TARGET_64BIT)
8645 got = gen_rtx_REG (Pmode, 2);
8646 else
8647 {
8648 if (flag_pic == 1)
8649 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8650 else
8651 {
8652 rtx gsym = rs6000_got_sym ();
8653 got = gen_reg_rtx (Pmode);
8654 if (flag_pic == 0)
8655 rs6000_emit_move (got, gsym, Pmode);
8656 else
8657 {
8658 rtx mem, lab;
8659
8660 tmp1 = gen_reg_rtx (Pmode);
8661 tmp2 = gen_reg_rtx (Pmode);
8662 mem = gen_const_mem (Pmode, tmp1);
8663 lab = gen_label_rtx ();
8664 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8665 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8666 if (TARGET_LINK_STACK)
8667 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8668 emit_move_insn (tmp2, mem);
8669 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8670 set_unique_reg_note (last, REG_EQUAL, gsym);
8671 }
8672 }
8673 }
8674
8675 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8676 {
8677 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8678 UNSPEC_TLSGD);
8679 tga = rs6000_tls_get_addr ();
8680 rtx argreg = gen_rtx_REG (Pmode, 3);
8681 emit_insn (gen_rtx_SET (argreg, arg));
8682 global_tlsarg = arg;
8683 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8684 global_tlsarg = NULL_RTX;
8685
8686 /* Make a note so that the result of this call can be CSEd. */
8687 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8688 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8689 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8690 }
8691 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8692 {
8693 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8694 tga = rs6000_tls_get_addr ();
8695 tmp1 = gen_reg_rtx (Pmode);
8696 rtx argreg = gen_rtx_REG (Pmode, 3);
8697 emit_insn (gen_rtx_SET (argreg, arg));
8698 global_tlsarg = arg;
8699 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8700 global_tlsarg = NULL_RTX;
8701
8702 /* Make a note so that the result of this call can be CSEd. */
8703 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8704 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8705 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8706
8707 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8708 {
8709 if (TARGET_64BIT)
8710 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8711 else
8712 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8713 }
8714 else if (rs6000_tls_size == 32)
8715 {
8716 tmp2 = gen_reg_rtx (Pmode);
8717 if (TARGET_64BIT)
8718 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8719 else
8720 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8721 emit_insn (insn);
8722 if (TARGET_64BIT)
8723 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8724 else
8725 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8726 }
8727 else
8728 {
8729 tmp2 = gen_reg_rtx (Pmode);
8730 if (TARGET_64BIT)
8731 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8732 else
8733 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8734 emit_insn (insn);
8735 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8736 }
8737 emit_insn (insn);
8738 }
8739 else
8740 {
8741 /* IE, or 64-bit offset LE. */
8742 tmp2 = gen_reg_rtx (Pmode);
8743 if (TARGET_64BIT)
8744 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8745 else
8746 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8747 emit_insn (insn);
8748 if (rs6000_pcrel_p (cfun))
8749 {
8750 if (TARGET_64BIT)
8751 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8752 else
8753 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8754 }
8755 else if (TARGET_64BIT)
8756 insn = gen_tls_tls_64 (dest, tmp2, addr);
8757 else
8758 insn = gen_tls_tls_32 (dest, tmp2, addr);
8759 emit_insn (insn);
8760 }
8761 }
8762
8763 return dest;
8764 }
8765
8766 /* Only create the global variable for the stack protect guard if we are using
8767 the global flavor of that guard. */
8768 static tree
8769 rs6000_init_stack_protect_guard (void)
8770 {
8771 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8772 return default_stack_protect_guard ();
8773
8774 return NULL_TREE;
8775 }
8776
8777 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8778
8779 static bool
8780 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8781 {
8782 if (GET_CODE (x) == HIGH
8783 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8784 return true;
8785
8786 /* A TLS symbol in the TOC cannot contain a sum. */
8787 if (GET_CODE (x) == CONST
8788 && GET_CODE (XEXP (x, 0)) == PLUS
8789 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8790 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8791 return true;
8792
8793 /* Do not place an ELF TLS symbol in the constant pool. */
8794 return TARGET_ELF && tls_referenced_p (x);
8795 }
8796
8797 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8798 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8799 can be addressed relative to the toc pointer. */
8800
8801 static bool
8802 use_toc_relative_ref (rtx sym, machine_mode mode)
8803 {
8804 return ((constant_pool_expr_p (sym)
8805 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8806 get_pool_mode (sym)))
8807 || (TARGET_CMODEL == CMODEL_MEDIUM
8808 && SYMBOL_REF_LOCAL_P (sym)
8809 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8810 }
8811
8812 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8813 that is a valid memory address for an instruction.
8814 The MODE argument is the machine mode for the MEM expression
8815 that wants to use this address.
8816
8817 On the RS/6000, there are four valid address: a SYMBOL_REF that
8818 refers to a constant pool entry of an address (or the sum of it
8819 plus a constant), a short (16-bit signed) constant plus a register,
8820 the sum of two registers, or a register indirect, possibly with an
8821 auto-increment. For DFmode, DDmode and DImode with a constant plus
8822 register, we must ensure that both words are addressable or PowerPC64
8823 with offset word aligned.
8824
8825 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8826 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8827 because adjacent memory cells are accessed by adding word-sized offsets
8828 during assembly output. */
8829 static bool
8830 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8831 {
8832 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8833 bool quad_offset_p = mode_supports_dq_form (mode);
8834
8835 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8836 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
8837 && GET_CODE (x) == AND
8838 && CONST_INT_P (XEXP (x, 1))
8839 && INTVAL (XEXP (x, 1)) == -16)
8840 x = XEXP (x, 0);
8841
8842 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8843 return 0;
8844 if (legitimate_indirect_address_p (x, reg_ok_strict))
8845 return 1;
8846 if (TARGET_UPDATE
8847 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8848 && mode_supports_pre_incdec_p (mode)
8849 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8850 return 1;
8851
8852 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8853 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8854 return 1;
8855
8856 /* Handle restricted vector d-form offsets in ISA 3.0. */
8857 if (quad_offset_p)
8858 {
8859 if (quad_address_p (x, mode, reg_ok_strict))
8860 return 1;
8861 }
8862 else if (virtual_stack_registers_memory_p (x))
8863 return 1;
8864
8865 else if (reg_offset_p)
8866 {
8867 if (legitimate_small_data_p (mode, x))
8868 return 1;
8869 if (legitimate_constant_pool_address_p (x, mode,
8870 reg_ok_strict || lra_in_progress))
8871 return 1;
8872 }
8873
8874 /* For TImode, if we have TImode in VSX registers, only allow register
8875 indirect addresses. This will allow the values to go in either GPRs
8876 or VSX registers without reloading. The vector types would tend to
8877 go into VSX registers, so we allow REG+REG, while TImode seems
8878 somewhat split, in that some uses are GPR based, and some VSX based. */
8879 /* FIXME: We could loosen this by changing the following to
8880 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8881 but currently we cannot allow REG+REG addressing for TImode. See
8882 PR72827 for complete details on how this ends up hoodwinking DSE. */
8883 if (mode == TImode && TARGET_VSX)
8884 return 0;
8885 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8886 if (! reg_ok_strict
8887 && reg_offset_p
8888 && GET_CODE (x) == PLUS
8889 && REG_P (XEXP (x, 0))
8890 && (XEXP (x, 0) == virtual_stack_vars_rtx
8891 || XEXP (x, 0) == arg_pointer_rtx)
8892 && CONST_INT_P (XEXP (x, 1)))
8893 return 1;
8894 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8895 return 1;
8896 if (!FLOAT128_2REG_P (mode)
8897 && (TARGET_HARD_FLOAT
8898 || TARGET_POWERPC64
8899 || (mode != DFmode && mode != DDmode))
8900 && (TARGET_POWERPC64 || mode != DImode)
8901 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8902 && mode != PTImode
8903 && !avoiding_indexed_address_p (mode)
8904 && legitimate_indexed_address_p (x, reg_ok_strict))
8905 return 1;
8906 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8907 && mode_supports_pre_modify_p (mode)
8908 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8909 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8910 reg_ok_strict, false)
8911 || (!avoiding_indexed_address_p (mode)
8912 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8913 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8914 {
8915 /* There is no prefixed version of the load/store with update. */
8916 rtx addr = XEXP (x, 1);
8917 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8918 }
8919 if (reg_offset_p && !quad_offset_p
8920 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8921 return 1;
8922 return 0;
8923 }
8924
8925 /* Debug version of rs6000_legitimate_address_p. */
8926 static bool
8927 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8928 bool reg_ok_strict)
8929 {
8930 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8931 fprintf (stderr,
8932 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8933 "strict = %d, reload = %s, code = %s\n",
8934 ret ? "true" : "false",
8935 GET_MODE_NAME (mode),
8936 reg_ok_strict,
8937 (reload_completed ? "after" : "before"),
8938 GET_RTX_NAME (GET_CODE (x)));
8939 debug_rtx (x);
8940
8941 return ret;
8942 }
8943
8944 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8945
8946 static bool
8947 rs6000_mode_dependent_address_p (const_rtx addr,
8948 addr_space_t as ATTRIBUTE_UNUSED)
8949 {
8950 return rs6000_mode_dependent_address_ptr (addr);
8951 }
8952
8953 /* Go to LABEL if ADDR (a legitimate address expression)
8954 has an effect that depends on the machine mode it is used for.
8955
8956 On the RS/6000 this is true of all integral offsets (since AltiVec
8957 and VSX modes don't allow them) or is a pre-increment or decrement.
8958
8959 ??? Except that due to conceptual problems in offsettable_address_p
8960 we can't really report the problems of integral offsets. So leave
8961 this assuming that the adjustable offset must be valid for the
8962 sub-words of a TFmode operand, which is what we had before. */
8963
8964 static bool
8965 rs6000_mode_dependent_address (const_rtx addr)
8966 {
8967 switch (GET_CODE (addr))
8968 {
8969 case PLUS:
8970 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8971 is considered a legitimate address before reload, so there
8972 are no offset restrictions in that case. Note that this
8973 condition is safe in strict mode because any address involving
8974 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8975 been rejected as illegitimate. */
8976 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8977 && XEXP (addr, 0) != arg_pointer_rtx
8978 && CONST_INT_P (XEXP (addr, 1)))
8979 {
8980 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8981 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8982 if (TARGET_PREFIXED)
8983 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8984 else
8985 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8986 }
8987 break;
8988
8989 case LO_SUM:
8990 /* Anything in the constant pool is sufficiently aligned that
8991 all bytes have the same high part address. */
8992 return !legitimate_constant_pool_address_p (addr, QImode, false);
8993
8994 /* Auto-increment cases are now treated generically in recog.c. */
8995 case PRE_MODIFY:
8996 return TARGET_UPDATE;
8997
8998 /* AND is only allowed in Altivec loads. */
8999 case AND:
9000 return true;
9001
9002 default:
9003 break;
9004 }
9005
9006 return false;
9007 }
9008
9009 /* Debug version of rs6000_mode_dependent_address. */
9010 static bool
9011 rs6000_debug_mode_dependent_address (const_rtx addr)
9012 {
9013 bool ret = rs6000_mode_dependent_address (addr);
9014
9015 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9016 ret ? "true" : "false");
9017 debug_rtx (addr);
9018
9019 return ret;
9020 }
9021
9022 /* Implement FIND_BASE_TERM. */
9023
9024 rtx
9025 rs6000_find_base_term (rtx op)
9026 {
9027 rtx base;
9028
9029 base = op;
9030 if (GET_CODE (base) == CONST)
9031 base = XEXP (base, 0);
9032 if (GET_CODE (base) == PLUS)
9033 base = XEXP (base, 0);
9034 if (GET_CODE (base) == UNSPEC)
9035 switch (XINT (base, 1))
9036 {
9037 case UNSPEC_TOCREL:
9038 case UNSPEC_MACHOPIC_OFFSET:
9039 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9040 for aliasing purposes. */
9041 return XVECEXP (base, 0, 0);
9042 }
9043
9044 return op;
9045 }
9046
9047 /* More elaborate version of recog's offsettable_memref_p predicate
9048 that works around the ??? note of rs6000_mode_dependent_address.
9049 In particular it accepts
9050
9051 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9052
9053 in 32-bit mode, that the recog predicate rejects. */
9054
9055 static bool
9056 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9057 {
9058 bool worst_case;
9059
9060 if (!MEM_P (op))
9061 return false;
9062
9063 /* First mimic offsettable_memref_p. */
9064 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9065 return true;
9066
9067 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9068 the latter predicate knows nothing about the mode of the memory
9069 reference and, therefore, assumes that it is the largest supported
9070 mode (TFmode). As a consequence, legitimate offsettable memory
9071 references are rejected. rs6000_legitimate_offset_address_p contains
9072 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9073 at least with a little bit of help here given that we know the
9074 actual registers used. */
9075 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9076 || GET_MODE_SIZE (reg_mode) == 4);
9077 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9078 strict, worst_case);
9079 }
9080
9081 /* Determine the reassociation width to be used in reassociate_bb.
9082 This takes into account how many parallel operations we
9083 can actually do of a given type, and also the latency.
9084 P8:
9085 int add/sub 6/cycle
9086 mul 2/cycle
9087 vect add/sub/mul 2/cycle
9088 fp add/sub/mul 2/cycle
9089 dfp 1/cycle
9090 */
9091
9092 static int
9093 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9094 machine_mode mode)
9095 {
9096 switch (rs6000_tune)
9097 {
9098 case PROCESSOR_POWER8:
9099 case PROCESSOR_POWER9:
9100 case PROCESSOR_FUTURE:
9101 if (DECIMAL_FLOAT_MODE_P (mode))
9102 return 1;
9103 if (VECTOR_MODE_P (mode))
9104 return 4;
9105 if (INTEGRAL_MODE_P (mode))
9106 return 1;
9107 if (FLOAT_MODE_P (mode))
9108 return 4;
9109 break;
9110 default:
9111 break;
9112 }
9113 return 1;
9114 }
9115
9116 /* Change register usage conditional on target flags. */
9117 static void
9118 rs6000_conditional_register_usage (void)
9119 {
9120 int i;
9121
9122 if (TARGET_DEBUG_TARGET)
9123 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9124
9125 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9126 if (TARGET_64BIT)
9127 fixed_regs[13] = call_used_regs[13] = 1;
9128
9129 /* Conditionally disable FPRs. */
9130 if (TARGET_SOFT_FLOAT)
9131 for (i = 32; i < 64; i++)
9132 fixed_regs[i] = call_used_regs[i] = 1;
9133
9134 /* The TOC register is not killed across calls in a way that is
9135 visible to the compiler. */
9136 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9137 call_used_regs[2] = 0;
9138
9139 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9140 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9141
9142 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9143 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9144 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9145
9146 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9147 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9148 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9149
9150 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9151 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9152
9153 if (!TARGET_ALTIVEC && !TARGET_VSX)
9154 {
9155 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9156 fixed_regs[i] = call_used_regs[i] = 1;
9157 call_used_regs[VRSAVE_REGNO] = 1;
9158 }
9159
9160 if (TARGET_ALTIVEC || TARGET_VSX)
9161 global_regs[VSCR_REGNO] = 1;
9162
9163 if (TARGET_ALTIVEC_ABI)
9164 {
9165 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9166 call_used_regs[i] = 1;
9167
9168 /* AIX reserves VR20:31 in non-extended ABI mode. */
9169 if (TARGET_XCOFF)
9170 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9171 fixed_regs[i] = call_used_regs[i] = 1;
9172 }
9173 }
9174
9175 \f
9176 /* Output insns to set DEST equal to the constant SOURCE as a series of
9177 lis, ori and shl instructions and return TRUE. */
9178
9179 bool
9180 rs6000_emit_set_const (rtx dest, rtx source)
9181 {
9182 machine_mode mode = GET_MODE (dest);
9183 rtx temp, set;
9184 rtx_insn *insn;
9185 HOST_WIDE_INT c;
9186
9187 gcc_checking_assert (CONST_INT_P (source));
9188 c = INTVAL (source);
9189 switch (mode)
9190 {
9191 case E_QImode:
9192 case E_HImode:
9193 emit_insn (gen_rtx_SET (dest, source));
9194 return true;
9195
9196 case E_SImode:
9197 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9198
9199 emit_insn (gen_rtx_SET (copy_rtx (temp),
9200 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9201 emit_insn (gen_rtx_SET (dest,
9202 gen_rtx_IOR (SImode, copy_rtx (temp),
9203 GEN_INT (c & 0xffff))));
9204 break;
9205
9206 case E_DImode:
9207 if (!TARGET_POWERPC64)
9208 {
9209 rtx hi, lo;
9210
9211 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9212 DImode);
9213 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9214 DImode);
9215 emit_move_insn (hi, GEN_INT (c >> 32));
9216 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9217 emit_move_insn (lo, GEN_INT (c));
9218 }
9219 else
9220 rs6000_emit_set_long_const (dest, c);
9221 break;
9222
9223 default:
9224 gcc_unreachable ();
9225 }
9226
9227 insn = get_last_insn ();
9228 set = single_set (insn);
9229 if (! CONSTANT_P (SET_SRC (set)))
9230 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9231
9232 return true;
9233 }
9234
9235 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9236 Output insns to set DEST equal to the constant C as a series of
9237 lis, ori and shl instructions. */
9238
9239 static void
9240 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9241 {
9242 rtx temp;
9243 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9244
9245 ud1 = c & 0xffff;
9246 c = c >> 16;
9247 ud2 = c & 0xffff;
9248 c = c >> 16;
9249 ud3 = c & 0xffff;
9250 c = c >> 16;
9251 ud4 = c & 0xffff;
9252
9253 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9254 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9255 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9256
9257 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9258 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9259 {
9260 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9261
9262 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9263 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9264 if (ud1 != 0)
9265 emit_move_insn (dest,
9266 gen_rtx_IOR (DImode, copy_rtx (temp),
9267 GEN_INT (ud1)));
9268 }
9269 else if (ud3 == 0 && ud4 == 0)
9270 {
9271 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9272
9273 gcc_assert (ud2 & 0x8000);
9274 emit_move_insn (copy_rtx (temp),
9275 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9276 if (ud1 != 0)
9277 emit_move_insn (copy_rtx (temp),
9278 gen_rtx_IOR (DImode, copy_rtx (temp),
9279 GEN_INT (ud1)));
9280 emit_move_insn (dest,
9281 gen_rtx_ZERO_EXTEND (DImode,
9282 gen_lowpart (SImode,
9283 copy_rtx (temp))));
9284 }
9285 else if (ud1 == ud3 && ud2 == ud4)
9286 {
9287 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9288 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9289 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9290 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9291 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9292 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9293 }
9294 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9295 || (ud4 == 0 && ! (ud3 & 0x8000)))
9296 {
9297 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9298
9299 emit_move_insn (copy_rtx (temp),
9300 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9301 if (ud2 != 0)
9302 emit_move_insn (copy_rtx (temp),
9303 gen_rtx_IOR (DImode, copy_rtx (temp),
9304 GEN_INT (ud2)));
9305 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9306 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9307 GEN_INT (16)));
9308 if (ud1 != 0)
9309 emit_move_insn (dest,
9310 gen_rtx_IOR (DImode, copy_rtx (temp),
9311 GEN_INT (ud1)));
9312 }
9313 else
9314 {
9315 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9316
9317 emit_move_insn (copy_rtx (temp),
9318 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9319 if (ud3 != 0)
9320 emit_move_insn (copy_rtx (temp),
9321 gen_rtx_IOR (DImode, copy_rtx (temp),
9322 GEN_INT (ud3)));
9323
9324 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9325 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9326 GEN_INT (32)));
9327 if (ud2 != 0)
9328 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9329 gen_rtx_IOR (DImode, copy_rtx (temp),
9330 GEN_INT (ud2 << 16)));
9331 if (ud1 != 0)
9332 emit_move_insn (dest,
9333 gen_rtx_IOR (DImode, copy_rtx (temp),
9334 GEN_INT (ud1)));
9335 }
9336 }
9337
9338 /* Helper for the following. Get rid of [r+r] memory refs
9339 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9340
9341 static void
9342 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9343 {
9344 if (MEM_P (operands[0])
9345 && !REG_P (XEXP (operands[0], 0))
9346 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9347 GET_MODE (operands[0]), false))
9348 operands[0]
9349 = replace_equiv_address (operands[0],
9350 copy_addr_to_reg (XEXP (operands[0], 0)));
9351
9352 if (MEM_P (operands[1])
9353 && !REG_P (XEXP (operands[1], 0))
9354 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9355 GET_MODE (operands[1]), false))
9356 operands[1]
9357 = replace_equiv_address (operands[1],
9358 copy_addr_to_reg (XEXP (operands[1], 0)));
9359 }
9360
9361 /* Generate a vector of constants to permute MODE for a little-endian
9362 storage operation by swapping the two halves of a vector. */
9363 static rtvec
9364 rs6000_const_vec (machine_mode mode)
9365 {
9366 int i, subparts;
9367 rtvec v;
9368
9369 switch (mode)
9370 {
9371 case E_V1TImode:
9372 subparts = 1;
9373 break;
9374 case E_V2DFmode:
9375 case E_V2DImode:
9376 subparts = 2;
9377 break;
9378 case E_V4SFmode:
9379 case E_V4SImode:
9380 subparts = 4;
9381 break;
9382 case E_V8HImode:
9383 subparts = 8;
9384 break;
9385 case E_V16QImode:
9386 subparts = 16;
9387 break;
9388 default:
9389 gcc_unreachable();
9390 }
9391
9392 v = rtvec_alloc (subparts);
9393
9394 for (i = 0; i < subparts / 2; ++i)
9395 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9396 for (i = subparts / 2; i < subparts; ++i)
9397 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9398
9399 return v;
9400 }
9401
9402 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9403 store operation. */
9404 void
9405 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9406 {
9407 /* Scalar permutations are easier to express in integer modes rather than
9408 floating-point modes, so cast them here. We use V1TImode instead
9409 of TImode to ensure that the values don't go through GPRs. */
9410 if (FLOAT128_VECTOR_P (mode))
9411 {
9412 dest = gen_lowpart (V1TImode, dest);
9413 source = gen_lowpart (V1TImode, source);
9414 mode = V1TImode;
9415 }
9416
9417 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9418 scalar. */
9419 if (mode == TImode || mode == V1TImode)
9420 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9421 GEN_INT (64))));
9422 else
9423 {
9424 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9425 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9426 }
9427 }
9428
9429 /* Emit a little-endian load from vector memory location SOURCE to VSX
9430 register DEST in mode MODE. The load is done with two permuting
9431 insn's that represent an lxvd2x and xxpermdi. */
9432 void
9433 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9434 {
9435 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9436 V1TImode). */
9437 if (mode == TImode || mode == V1TImode)
9438 {
9439 mode = V2DImode;
9440 dest = gen_lowpart (V2DImode, dest);
9441 source = adjust_address (source, V2DImode, 0);
9442 }
9443
9444 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9445 rs6000_emit_le_vsx_permute (tmp, source, mode);
9446 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9447 }
9448
9449 /* Emit a little-endian store to vector memory location DEST from VSX
9450 register SOURCE in mode MODE. The store is done with two permuting
9451 insn's that represent an xxpermdi and an stxvd2x. */
9452 void
9453 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9454 {
9455 /* This should never be called during or after LRA, because it does
9456 not re-permute the source register. It is intended only for use
9457 during expand. */
9458 gcc_assert (!lra_in_progress && !reload_completed);
9459
9460 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9461 V1TImode). */
9462 if (mode == TImode || mode == V1TImode)
9463 {
9464 mode = V2DImode;
9465 dest = adjust_address (dest, V2DImode, 0);
9466 source = gen_lowpart (V2DImode, source);
9467 }
9468
9469 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9470 rs6000_emit_le_vsx_permute (tmp, source, mode);
9471 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9472 }
9473
9474 /* Emit a sequence representing a little-endian VSX load or store,
9475 moving data from SOURCE to DEST in mode MODE. This is done
9476 separately from rs6000_emit_move to ensure it is called only
9477 during expand. LE VSX loads and stores introduced later are
9478 handled with a split. The expand-time RTL generation allows
9479 us to optimize away redundant pairs of register-permutes. */
9480 void
9481 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9482 {
9483 gcc_assert (!BYTES_BIG_ENDIAN
9484 && VECTOR_MEM_VSX_P (mode)
9485 && !TARGET_P9_VECTOR
9486 && !gpr_or_gpr_p (dest, source)
9487 && (MEM_P (source) ^ MEM_P (dest)));
9488
9489 if (MEM_P (source))
9490 {
9491 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9492 rs6000_emit_le_vsx_load (dest, source, mode);
9493 }
9494 else
9495 {
9496 if (!REG_P (source))
9497 source = force_reg (mode, source);
9498 rs6000_emit_le_vsx_store (dest, source, mode);
9499 }
9500 }
9501
9502 /* Return whether a SFmode or SImode move can be done without converting one
9503 mode to another. This arrises when we have:
9504
9505 (SUBREG:SF (REG:SI ...))
9506 (SUBREG:SI (REG:SF ...))
9507
9508 and one of the values is in a floating point/vector register, where SFmode
9509 scalars are stored in DFmode format. */
9510
9511 bool
9512 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9513 {
9514 if (TARGET_ALLOW_SF_SUBREG)
9515 return true;
9516
9517 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9518 return true;
9519
9520 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9521 return true;
9522
9523 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9524 if (SUBREG_P (dest))
9525 {
9526 rtx dest_subreg = SUBREG_REG (dest);
9527 rtx src_subreg = SUBREG_REG (src);
9528 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9529 }
9530
9531 return false;
9532 }
9533
9534
9535 /* Helper function to change moves with:
9536
9537 (SUBREG:SF (REG:SI)) and
9538 (SUBREG:SI (REG:SF))
9539
9540 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9541 values are stored as DFmode values in the VSX registers. We need to convert
9542 the bits before we can use a direct move or operate on the bits in the
9543 vector register as an integer type.
9544
9545 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9546
9547 static bool
9548 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9549 {
9550 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9551 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9552 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9553 {
9554 rtx inner_source = SUBREG_REG (source);
9555 machine_mode inner_mode = GET_MODE (inner_source);
9556
9557 if (mode == SImode && inner_mode == SFmode)
9558 {
9559 emit_insn (gen_movsi_from_sf (dest, inner_source));
9560 return true;
9561 }
9562
9563 if (mode == SFmode && inner_mode == SImode)
9564 {
9565 emit_insn (gen_movsf_from_si (dest, inner_source));
9566 return true;
9567 }
9568 }
9569
9570 return false;
9571 }
9572
9573 /* Emit a move from SOURCE to DEST in mode MODE. */
9574 void
9575 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9576 {
9577 rtx operands[2];
9578 operands[0] = dest;
9579 operands[1] = source;
9580
9581 if (TARGET_DEBUG_ADDR)
9582 {
9583 fprintf (stderr,
9584 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9585 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9586 GET_MODE_NAME (mode),
9587 lra_in_progress,
9588 reload_completed,
9589 can_create_pseudo_p ());
9590 debug_rtx (dest);
9591 fprintf (stderr, "source:\n");
9592 debug_rtx (source);
9593 }
9594
9595 /* Check that we get CONST_WIDE_INT only when we should. */
9596 if (CONST_WIDE_INT_P (operands[1])
9597 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9598 gcc_unreachable ();
9599
9600 #ifdef HAVE_AS_GNU_ATTRIBUTE
9601 /* If we use a long double type, set the flags in .gnu_attribute that say
9602 what the long double type is. This is to allow the linker's warning
9603 message for the wrong long double to be useful, even if the function does
9604 not do a call (for example, doing a 128-bit add on power9 if the long
9605 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9606 used if they aren't the default long dobule type. */
9607 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9608 {
9609 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9610 rs6000_passes_float = rs6000_passes_long_double = true;
9611
9612 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9613 rs6000_passes_float = rs6000_passes_long_double = true;
9614 }
9615 #endif
9616
9617 /* See if we need to special case SImode/SFmode SUBREG moves. */
9618 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9619 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9620 return;
9621
9622 /* Check if GCC is setting up a block move that will end up using FP
9623 registers as temporaries. We must make sure this is acceptable. */
9624 if (MEM_P (operands[0])
9625 && MEM_P (operands[1])
9626 && mode == DImode
9627 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9628 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9629 && ! (rs6000_slow_unaligned_access (SImode,
9630 (MEM_ALIGN (operands[0]) > 32
9631 ? 32 : MEM_ALIGN (operands[0])))
9632 || rs6000_slow_unaligned_access (SImode,
9633 (MEM_ALIGN (operands[1]) > 32
9634 ? 32 : MEM_ALIGN (operands[1]))))
9635 && ! MEM_VOLATILE_P (operands [0])
9636 && ! MEM_VOLATILE_P (operands [1]))
9637 {
9638 emit_move_insn (adjust_address (operands[0], SImode, 0),
9639 adjust_address (operands[1], SImode, 0));
9640 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9641 adjust_address (copy_rtx (operands[1]), SImode, 4));
9642 return;
9643 }
9644
9645 if (can_create_pseudo_p () && MEM_P (operands[0])
9646 && !gpc_reg_operand (operands[1], mode))
9647 operands[1] = force_reg (mode, operands[1]);
9648
9649 /* Recognize the case where operand[1] is a reference to thread-local
9650 data and load its address to a register. */
9651 if (tls_referenced_p (operands[1]))
9652 {
9653 enum tls_model model;
9654 rtx tmp = operands[1];
9655 rtx addend = NULL;
9656
9657 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9658 {
9659 addend = XEXP (XEXP (tmp, 0), 1);
9660 tmp = XEXP (XEXP (tmp, 0), 0);
9661 }
9662
9663 gcc_assert (SYMBOL_REF_P (tmp));
9664 model = SYMBOL_REF_TLS_MODEL (tmp);
9665 gcc_assert (model != 0);
9666
9667 tmp = rs6000_legitimize_tls_address (tmp, model);
9668 if (addend)
9669 {
9670 tmp = gen_rtx_PLUS (mode, tmp, addend);
9671 tmp = force_operand (tmp, operands[0]);
9672 }
9673 operands[1] = tmp;
9674 }
9675
9676 /* 128-bit constant floating-point values on Darwin should really be loaded
9677 as two parts. However, this premature splitting is a problem when DFmode
9678 values can go into Altivec registers. */
9679 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9680 && !reg_addr[DFmode].scalar_in_vmx_p)
9681 {
9682 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9683 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9684 DFmode);
9685 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9686 GET_MODE_SIZE (DFmode)),
9687 simplify_gen_subreg (DFmode, operands[1], mode,
9688 GET_MODE_SIZE (DFmode)),
9689 DFmode);
9690 return;
9691 }
9692
9693 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9694 p1:SD) if p1 is not of floating point class and p0 is spilled as
9695 we can have no analogous movsd_store for this. */
9696 if (lra_in_progress && mode == DDmode
9697 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9698 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9699 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9700 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9701 {
9702 enum reg_class cl;
9703 int regno = REGNO (SUBREG_REG (operands[1]));
9704
9705 if (!HARD_REGISTER_NUM_P (regno))
9706 {
9707 cl = reg_preferred_class (regno);
9708 regno = reg_renumber[regno];
9709 if (regno < 0)
9710 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9711 }
9712 if (regno >= 0 && ! FP_REGNO_P (regno))
9713 {
9714 mode = SDmode;
9715 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9716 operands[1] = SUBREG_REG (operands[1]);
9717 }
9718 }
9719 if (lra_in_progress
9720 && mode == SDmode
9721 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9722 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9723 && (REG_P (operands[1])
9724 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9725 {
9726 int regno = reg_or_subregno (operands[1]);
9727 enum reg_class cl;
9728
9729 if (!HARD_REGISTER_NUM_P (regno))
9730 {
9731 cl = reg_preferred_class (regno);
9732 gcc_assert (cl != NO_REGS);
9733 regno = reg_renumber[regno];
9734 if (regno < 0)
9735 regno = ira_class_hard_regs[cl][0];
9736 }
9737 if (FP_REGNO_P (regno))
9738 {
9739 if (GET_MODE (operands[0]) != DDmode)
9740 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9741 emit_insn (gen_movsd_store (operands[0], operands[1]));
9742 }
9743 else if (INT_REGNO_P (regno))
9744 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9745 else
9746 gcc_unreachable();
9747 return;
9748 }
9749 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9750 p:DD)) if p0 is not of floating point class and p1 is spilled as
9751 we can have no analogous movsd_load for this. */
9752 if (lra_in_progress && mode == DDmode
9753 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9754 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9755 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9756 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9757 {
9758 enum reg_class cl;
9759 int regno = REGNO (SUBREG_REG (operands[0]));
9760
9761 if (!HARD_REGISTER_NUM_P (regno))
9762 {
9763 cl = reg_preferred_class (regno);
9764 regno = reg_renumber[regno];
9765 if (regno < 0)
9766 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9767 }
9768 if (regno >= 0 && ! FP_REGNO_P (regno))
9769 {
9770 mode = SDmode;
9771 operands[0] = SUBREG_REG (operands[0]);
9772 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9773 }
9774 }
9775 if (lra_in_progress
9776 && mode == SDmode
9777 && (REG_P (operands[0])
9778 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9779 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9780 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9781 {
9782 int regno = reg_or_subregno (operands[0]);
9783 enum reg_class cl;
9784
9785 if (!HARD_REGISTER_NUM_P (regno))
9786 {
9787 cl = reg_preferred_class (regno);
9788 gcc_assert (cl != NO_REGS);
9789 regno = reg_renumber[regno];
9790 if (regno < 0)
9791 regno = ira_class_hard_regs[cl][0];
9792 }
9793 if (FP_REGNO_P (regno))
9794 {
9795 if (GET_MODE (operands[1]) != DDmode)
9796 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9797 emit_insn (gen_movsd_load (operands[0], operands[1]));
9798 }
9799 else if (INT_REGNO_P (regno))
9800 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9801 else
9802 gcc_unreachable();
9803 return;
9804 }
9805
9806 /* FIXME: In the long term, this switch statement should go away
9807 and be replaced by a sequence of tests based on things like
9808 mode == Pmode. */
9809 switch (mode)
9810 {
9811 case E_HImode:
9812 case E_QImode:
9813 if (CONSTANT_P (operands[1])
9814 && !CONST_INT_P (operands[1]))
9815 operands[1] = force_const_mem (mode, operands[1]);
9816 break;
9817
9818 case E_TFmode:
9819 case E_TDmode:
9820 case E_IFmode:
9821 case E_KFmode:
9822 if (FLOAT128_2REG_P (mode))
9823 rs6000_eliminate_indexed_memrefs (operands);
9824 /* fall through */
9825
9826 case E_DFmode:
9827 case E_DDmode:
9828 case E_SFmode:
9829 case E_SDmode:
9830 if (CONSTANT_P (operands[1])
9831 && ! easy_fp_constant (operands[1], mode))
9832 operands[1] = force_const_mem (mode, operands[1]);
9833 break;
9834
9835 case E_V16QImode:
9836 case E_V8HImode:
9837 case E_V4SFmode:
9838 case E_V4SImode:
9839 case E_V2DFmode:
9840 case E_V2DImode:
9841 case E_V1TImode:
9842 if (CONSTANT_P (operands[1])
9843 && !easy_vector_constant (operands[1], mode))
9844 operands[1] = force_const_mem (mode, operands[1]);
9845 break;
9846
9847 case E_SImode:
9848 case E_DImode:
9849 /* Use default pattern for address of ELF small data */
9850 if (TARGET_ELF
9851 && mode == Pmode
9852 && DEFAULT_ABI == ABI_V4
9853 && (SYMBOL_REF_P (operands[1])
9854 || GET_CODE (operands[1]) == CONST)
9855 && small_data_operand (operands[1], mode))
9856 {
9857 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9858 return;
9859 }
9860
9861 /* Use the default pattern for loading up PC-relative addresses. */
9862 if (TARGET_PCREL && mode == Pmode
9863 && pcrel_local_or_external_address (operands[1], Pmode))
9864 {
9865 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9866 return;
9867 }
9868
9869 if (DEFAULT_ABI == ABI_V4
9870 && mode == Pmode && mode == SImode
9871 && flag_pic == 1 && got_operand (operands[1], mode))
9872 {
9873 emit_insn (gen_movsi_got (operands[0], operands[1]));
9874 return;
9875 }
9876
9877 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9878 && TARGET_NO_TOC_OR_PCREL
9879 && ! flag_pic
9880 && mode == Pmode
9881 && CONSTANT_P (operands[1])
9882 && GET_CODE (operands[1]) != HIGH
9883 && !CONST_INT_P (operands[1]))
9884 {
9885 rtx target = (!can_create_pseudo_p ()
9886 ? operands[0]
9887 : gen_reg_rtx (mode));
9888
9889 /* If this is a function address on -mcall-aixdesc,
9890 convert it to the address of the descriptor. */
9891 if (DEFAULT_ABI == ABI_AIX
9892 && SYMBOL_REF_P (operands[1])
9893 && XSTR (operands[1], 0)[0] == '.')
9894 {
9895 const char *name = XSTR (operands[1], 0);
9896 rtx new_ref;
9897 while (*name == '.')
9898 name++;
9899 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9900 CONSTANT_POOL_ADDRESS_P (new_ref)
9901 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9902 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9903 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9904 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9905 operands[1] = new_ref;
9906 }
9907
9908 if (DEFAULT_ABI == ABI_DARWIN)
9909 {
9910 #if TARGET_MACHO
9911 /* This is not PIC code, but could require the subset of
9912 indirections used by mdynamic-no-pic. */
9913 if (MACHO_DYNAMIC_NO_PIC_P)
9914 {
9915 /* Take care of any required data indirection. */
9916 operands[1] = rs6000_machopic_legitimize_pic_address (
9917 operands[1], mode, operands[0]);
9918 if (operands[0] != operands[1])
9919 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9920 return;
9921 }
9922 #endif
9923 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9924 emit_insn (gen_macho_low (Pmode, operands[0],
9925 target, operands[1]));
9926 return;
9927 }
9928
9929 emit_insn (gen_elf_high (target, operands[1]));
9930 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9931 return;
9932 }
9933
9934 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9935 and we have put it in the TOC, we just need to make a TOC-relative
9936 reference to it. */
9937 if (TARGET_TOC
9938 && SYMBOL_REF_P (operands[1])
9939 && use_toc_relative_ref (operands[1], mode))
9940 operands[1] = create_TOC_reference (operands[1], operands[0]);
9941 else if (mode == Pmode
9942 && CONSTANT_P (operands[1])
9943 && GET_CODE (operands[1]) != HIGH
9944 && ((REG_P (operands[0])
9945 && FP_REGNO_P (REGNO (operands[0])))
9946 || !CONST_INT_P (operands[1])
9947 || (num_insns_constant (operands[1], mode)
9948 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9949 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9950 && (TARGET_CMODEL == CMODEL_SMALL
9951 || can_create_pseudo_p ()
9952 || (REG_P (operands[0])
9953 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9954 {
9955
9956 #if TARGET_MACHO
9957 /* Darwin uses a special PIC legitimizer. */
9958 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9959 {
9960 operands[1] =
9961 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9962 operands[0]);
9963 if (operands[0] != operands[1])
9964 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9965 return;
9966 }
9967 #endif
9968
9969 /* If we are to limit the number of things we put in the TOC and
9970 this is a symbol plus a constant we can add in one insn,
9971 just put the symbol in the TOC and add the constant. */
9972 if (GET_CODE (operands[1]) == CONST
9973 && TARGET_NO_SUM_IN_TOC
9974 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9975 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9976 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9977 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9978 && ! side_effects_p (operands[0]))
9979 {
9980 rtx sym =
9981 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9982 rtx other = XEXP (XEXP (operands[1], 0), 1);
9983
9984 sym = force_reg (mode, sym);
9985 emit_insn (gen_add3_insn (operands[0], sym, other));
9986 return;
9987 }
9988
9989 operands[1] = force_const_mem (mode, operands[1]);
9990
9991 if (TARGET_TOC
9992 && SYMBOL_REF_P (XEXP (operands[1], 0))
9993 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9994 {
9995 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9996 operands[0]);
9997 operands[1] = gen_const_mem (mode, tocref);
9998 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9999 }
10000 }
10001 break;
10002
10003 case E_TImode:
10004 if (!VECTOR_MEM_VSX_P (TImode))
10005 rs6000_eliminate_indexed_memrefs (operands);
10006 break;
10007
10008 case E_PTImode:
10009 rs6000_eliminate_indexed_memrefs (operands);
10010 break;
10011
10012 default:
10013 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10014 }
10015
10016 /* Above, we may have called force_const_mem which may have returned
10017 an invalid address. If we can, fix this up; otherwise, reload will
10018 have to deal with it. */
10019 if (MEM_P (operands[1]))
10020 operands[1] = validize_mem (operands[1]);
10021
10022 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10023 }
10024 \f
10025
10026 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10027 static void
10028 init_float128_ibm (machine_mode mode)
10029 {
10030 if (!TARGET_XL_COMPAT)
10031 {
10032 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10033 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10034 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10035 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10036
10037 if (!TARGET_HARD_FLOAT)
10038 {
10039 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10040 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10041 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10042 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10043 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10044 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10045 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10046 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10047
10048 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10049 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10050 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10051 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10052 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10053 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10054 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10055 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10056 }
10057 }
10058 else
10059 {
10060 set_optab_libfunc (add_optab, mode, "_xlqadd");
10061 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10062 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10063 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10064 }
10065
10066 /* Add various conversions for IFmode to use the traditional TFmode
10067 names. */
10068 if (mode == IFmode)
10069 {
10070 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10071 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10072 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10073 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10074 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10075 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10076
10077 if (TARGET_POWERPC64)
10078 {
10079 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10080 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10081 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10082 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10083 }
10084 }
10085 }
10086
10087 /* Create a decl for either complex long double multiply or complex long double
10088 divide when long double is IEEE 128-bit floating point. We can't use
10089 __multc3 and __divtc3 because the original long double using IBM extended
10090 double used those names. The complex multiply/divide functions are encoded
10091 as builtin functions with a complex result and 4 scalar inputs. */
10092
10093 static void
10094 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10095 {
10096 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10097 name, NULL_TREE);
10098
10099 set_builtin_decl (fncode, fndecl, true);
10100
10101 if (TARGET_DEBUG_BUILTIN)
10102 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10103
10104 return;
10105 }
10106
10107 /* Set up IEEE 128-bit floating point routines. Use different names if the
10108 arguments can be passed in a vector register. The historical PowerPC
10109 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10110 continue to use that if we aren't using vector registers to pass IEEE
10111 128-bit floating point. */
10112
10113 static void
10114 init_float128_ieee (machine_mode mode)
10115 {
10116 if (FLOAT128_VECTOR_P (mode))
10117 {
10118 static bool complex_muldiv_init_p = false;
10119
10120 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10121 we have clone or target attributes, this will be called a second
10122 time. We want to create the built-in function only once. */
10123 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10124 {
10125 complex_muldiv_init_p = true;
10126 built_in_function fncode_mul =
10127 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10128 - MIN_MODE_COMPLEX_FLOAT);
10129 built_in_function fncode_div =
10130 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10131 - MIN_MODE_COMPLEX_FLOAT);
10132
10133 tree fntype = build_function_type_list (complex_long_double_type_node,
10134 long_double_type_node,
10135 long_double_type_node,
10136 long_double_type_node,
10137 long_double_type_node,
10138 NULL_TREE);
10139
10140 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10141 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10142 }
10143
10144 set_optab_libfunc (add_optab, mode, "__addkf3");
10145 set_optab_libfunc (sub_optab, mode, "__subkf3");
10146 set_optab_libfunc (neg_optab, mode, "__negkf2");
10147 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10148 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10149 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10150 set_optab_libfunc (abs_optab, mode, "__abskf2");
10151 set_optab_libfunc (powi_optab, mode, "__powikf2");
10152
10153 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10154 set_optab_libfunc (ne_optab, mode, "__nekf2");
10155 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10156 set_optab_libfunc (ge_optab, mode, "__gekf2");
10157 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10158 set_optab_libfunc (le_optab, mode, "__lekf2");
10159 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10160
10161 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10162 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10163 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10164 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10165
10166 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10167 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10168 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10169
10170 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10171 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10172 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10173
10174 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10175 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10176 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10177 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10178 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10179 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10180
10181 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10182 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10183 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10184 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10185
10186 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10187 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10188 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10189 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10190
10191 if (TARGET_POWERPC64)
10192 {
10193 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10194 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10195 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10196 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10197 }
10198 }
10199
10200 else
10201 {
10202 set_optab_libfunc (add_optab, mode, "_q_add");
10203 set_optab_libfunc (sub_optab, mode, "_q_sub");
10204 set_optab_libfunc (neg_optab, mode, "_q_neg");
10205 set_optab_libfunc (smul_optab, mode, "_q_mul");
10206 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10207 if (TARGET_PPC_GPOPT)
10208 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10209
10210 set_optab_libfunc (eq_optab, mode, "_q_feq");
10211 set_optab_libfunc (ne_optab, mode, "_q_fne");
10212 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10213 set_optab_libfunc (ge_optab, mode, "_q_fge");
10214 set_optab_libfunc (lt_optab, mode, "_q_flt");
10215 set_optab_libfunc (le_optab, mode, "_q_fle");
10216
10217 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10218 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10219 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10220 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10221 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10222 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10223 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10224 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10225 }
10226 }
10227
10228 static void
10229 rs6000_init_libfuncs (void)
10230 {
10231 /* __float128 support. */
10232 if (TARGET_FLOAT128_TYPE)
10233 {
10234 init_float128_ibm (IFmode);
10235 init_float128_ieee (KFmode);
10236 }
10237
10238 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10239 if (TARGET_LONG_DOUBLE_128)
10240 {
10241 if (!TARGET_IEEEQUAD)
10242 init_float128_ibm (TFmode);
10243
10244 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10245 else
10246 init_float128_ieee (TFmode);
10247 }
10248 }
10249
10250 /* Emit a potentially record-form instruction, setting DST from SRC.
10251 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10252 signed comparison of DST with zero. If DOT is 1, the generated RTL
10253 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10254 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10255 a separate COMPARE. */
10256
10257 void
10258 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10259 {
10260 if (dot == 0)
10261 {
10262 emit_move_insn (dst, src);
10263 return;
10264 }
10265
10266 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10267 {
10268 emit_move_insn (dst, src);
10269 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10270 return;
10271 }
10272
10273 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10274 if (dot == 1)
10275 {
10276 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10277 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10278 }
10279 else
10280 {
10281 rtx set = gen_rtx_SET (dst, src);
10282 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10283 }
10284 }
10285
10286 \f
10287 /* A validation routine: say whether CODE, a condition code, and MODE
10288 match. The other alternatives either don't make sense or should
10289 never be generated. */
10290
10291 void
10292 validate_condition_mode (enum rtx_code code, machine_mode mode)
10293 {
10294 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10295 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10296 && GET_MODE_CLASS (mode) == MODE_CC);
10297
10298 /* These don't make sense. */
10299 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10300 || mode != CCUNSmode);
10301
10302 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10303 || mode == CCUNSmode);
10304
10305 gcc_assert (mode == CCFPmode
10306 || (code != ORDERED && code != UNORDERED
10307 && code != UNEQ && code != LTGT
10308 && code != UNGT && code != UNLT
10309 && code != UNGE && code != UNLE));
10310
10311 /* These are invalid; the information is not there. */
10312 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10313 }
10314
10315 \f
10316 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10317 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10318 not zero, store there the bit offset (counted from the right) where
10319 the single stretch of 1 bits begins; and similarly for B, the bit
10320 offset where it ends. */
10321
10322 bool
10323 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10324 {
10325 unsigned HOST_WIDE_INT val = INTVAL (mask);
10326 unsigned HOST_WIDE_INT bit;
10327 int nb, ne;
10328 int n = GET_MODE_PRECISION (mode);
10329
10330 if (mode != DImode && mode != SImode)
10331 return false;
10332
10333 if (INTVAL (mask) >= 0)
10334 {
10335 bit = val & -val;
10336 ne = exact_log2 (bit);
10337 nb = exact_log2 (val + bit);
10338 }
10339 else if (val + 1 == 0)
10340 {
10341 nb = n;
10342 ne = 0;
10343 }
10344 else if (val & 1)
10345 {
10346 val = ~val;
10347 bit = val & -val;
10348 nb = exact_log2 (bit);
10349 ne = exact_log2 (val + bit);
10350 }
10351 else
10352 {
10353 bit = val & -val;
10354 ne = exact_log2 (bit);
10355 if (val + bit == 0)
10356 nb = n;
10357 else
10358 nb = 0;
10359 }
10360
10361 nb--;
10362
10363 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10364 return false;
10365
10366 if (b)
10367 *b = nb;
10368 if (e)
10369 *e = ne;
10370
10371 return true;
10372 }
10373
10374 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10375 or rldicr instruction, to implement an AND with it in mode MODE. */
10376
10377 bool
10378 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10379 {
10380 int nb, ne;
10381
10382 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10383 return false;
10384
10385 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10386 does not wrap. */
10387 if (mode == DImode)
10388 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10389
10390 /* For SImode, rlwinm can do everything. */
10391 if (mode == SImode)
10392 return (nb < 32 && ne < 32);
10393
10394 return false;
10395 }
10396
10397 /* Return the instruction template for an AND with mask in mode MODE, with
10398 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10399
10400 const char *
10401 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10402 {
10403 int nb, ne;
10404
10405 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10406 gcc_unreachable ();
10407
10408 if (mode == DImode && ne == 0)
10409 {
10410 operands[3] = GEN_INT (63 - nb);
10411 if (dot)
10412 return "rldicl. %0,%1,0,%3";
10413 return "rldicl %0,%1,0,%3";
10414 }
10415
10416 if (mode == DImode && nb == 63)
10417 {
10418 operands[3] = GEN_INT (63 - ne);
10419 if (dot)
10420 return "rldicr. %0,%1,0,%3";
10421 return "rldicr %0,%1,0,%3";
10422 }
10423
10424 if (nb < 32 && ne < 32)
10425 {
10426 operands[3] = GEN_INT (31 - nb);
10427 operands[4] = GEN_INT (31 - ne);
10428 if (dot)
10429 return "rlwinm. %0,%1,0,%3,%4";
10430 return "rlwinm %0,%1,0,%3,%4";
10431 }
10432
10433 gcc_unreachable ();
10434 }
10435
10436 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10437 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10438 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10439
10440 bool
10441 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10442 {
10443 int nb, ne;
10444
10445 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10446 return false;
10447
10448 int n = GET_MODE_PRECISION (mode);
10449 int sh = -1;
10450
10451 if (CONST_INT_P (XEXP (shift, 1)))
10452 {
10453 sh = INTVAL (XEXP (shift, 1));
10454 if (sh < 0 || sh >= n)
10455 return false;
10456 }
10457
10458 rtx_code code = GET_CODE (shift);
10459
10460 /* Convert any shift by 0 to a rotate, to simplify below code. */
10461 if (sh == 0)
10462 code = ROTATE;
10463
10464 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10465 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10466 code = ASHIFT;
10467 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10468 {
10469 code = LSHIFTRT;
10470 sh = n - sh;
10471 }
10472
10473 /* DImode rotates need rld*. */
10474 if (mode == DImode && code == ROTATE)
10475 return (nb == 63 || ne == 0 || ne == sh);
10476
10477 /* SImode rotates need rlw*. */
10478 if (mode == SImode && code == ROTATE)
10479 return (nb < 32 && ne < 32 && sh < 32);
10480
10481 /* Wrap-around masks are only okay for rotates. */
10482 if (ne > nb)
10483 return false;
10484
10485 /* Variable shifts are only okay for rotates. */
10486 if (sh < 0)
10487 return false;
10488
10489 /* Don't allow ASHIFT if the mask is wrong for that. */
10490 if (code == ASHIFT && ne < sh)
10491 return false;
10492
10493 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10494 if the mask is wrong for that. */
10495 if (nb < 32 && ne < 32 && sh < 32
10496 && !(code == LSHIFTRT && nb >= 32 - sh))
10497 return true;
10498
10499 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10500 if the mask is wrong for that. */
10501 if (code == LSHIFTRT)
10502 sh = 64 - sh;
10503 if (nb == 63 || ne == 0 || ne == sh)
10504 return !(code == LSHIFTRT && nb >= sh);
10505
10506 return false;
10507 }
10508
10509 /* Return the instruction template for a shift with mask in mode MODE, with
10510 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10511
10512 const char *
10513 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10514 {
10515 int nb, ne;
10516
10517 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10518 gcc_unreachable ();
10519
10520 if (mode == DImode && ne == 0)
10521 {
10522 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10523 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10524 operands[3] = GEN_INT (63 - nb);
10525 if (dot)
10526 return "rld%I2cl. %0,%1,%2,%3";
10527 return "rld%I2cl %0,%1,%2,%3";
10528 }
10529
10530 if (mode == DImode && nb == 63)
10531 {
10532 operands[3] = GEN_INT (63 - ne);
10533 if (dot)
10534 return "rld%I2cr. %0,%1,%2,%3";
10535 return "rld%I2cr %0,%1,%2,%3";
10536 }
10537
10538 if (mode == DImode
10539 && GET_CODE (operands[4]) != LSHIFTRT
10540 && CONST_INT_P (operands[2])
10541 && ne == INTVAL (operands[2]))
10542 {
10543 operands[3] = GEN_INT (63 - nb);
10544 if (dot)
10545 return "rld%I2c. %0,%1,%2,%3";
10546 return "rld%I2c %0,%1,%2,%3";
10547 }
10548
10549 if (nb < 32 && ne < 32)
10550 {
10551 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10552 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10553 operands[3] = GEN_INT (31 - nb);
10554 operands[4] = GEN_INT (31 - ne);
10555 /* This insn can also be a 64-bit rotate with mask that really makes
10556 it just a shift right (with mask); the %h below are to adjust for
10557 that situation (shift count is >= 32 in that case). */
10558 if (dot)
10559 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10560 return "rlw%I2nm %0,%1,%h2,%3,%4";
10561 }
10562
10563 gcc_unreachable ();
10564 }
10565
10566 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10567 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10568 ASHIFT, or LSHIFTRT) in mode MODE. */
10569
10570 bool
10571 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10572 {
10573 int nb, ne;
10574
10575 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10576 return false;
10577
10578 int n = GET_MODE_PRECISION (mode);
10579
10580 int sh = INTVAL (XEXP (shift, 1));
10581 if (sh < 0 || sh >= n)
10582 return false;
10583
10584 rtx_code code = GET_CODE (shift);
10585
10586 /* Convert any shift by 0 to a rotate, to simplify below code. */
10587 if (sh == 0)
10588 code = ROTATE;
10589
10590 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10591 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10592 code = ASHIFT;
10593 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10594 {
10595 code = LSHIFTRT;
10596 sh = n - sh;
10597 }
10598
10599 /* DImode rotates need rldimi. */
10600 if (mode == DImode && code == ROTATE)
10601 return (ne == sh);
10602
10603 /* SImode rotates need rlwimi. */
10604 if (mode == SImode && code == ROTATE)
10605 return (nb < 32 && ne < 32 && sh < 32);
10606
10607 /* Wrap-around masks are only okay for rotates. */
10608 if (ne > nb)
10609 return false;
10610
10611 /* Don't allow ASHIFT if the mask is wrong for that. */
10612 if (code == ASHIFT && ne < sh)
10613 return false;
10614
10615 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10616 if the mask is wrong for that. */
10617 if (nb < 32 && ne < 32 && sh < 32
10618 && !(code == LSHIFTRT && nb >= 32 - sh))
10619 return true;
10620
10621 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10622 if the mask is wrong for that. */
10623 if (code == LSHIFTRT)
10624 sh = 64 - sh;
10625 if (ne == sh)
10626 return !(code == LSHIFTRT && nb >= sh);
10627
10628 return false;
10629 }
10630
10631 /* Return the instruction template for an insert with mask in mode MODE, with
10632 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10633
10634 const char *
10635 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10636 {
10637 int nb, ne;
10638
10639 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10640 gcc_unreachable ();
10641
10642 /* Prefer rldimi because rlwimi is cracked. */
10643 if (TARGET_POWERPC64
10644 && (!dot || mode == DImode)
10645 && GET_CODE (operands[4]) != LSHIFTRT
10646 && ne == INTVAL (operands[2]))
10647 {
10648 operands[3] = GEN_INT (63 - nb);
10649 if (dot)
10650 return "rldimi. %0,%1,%2,%3";
10651 return "rldimi %0,%1,%2,%3";
10652 }
10653
10654 if (nb < 32 && ne < 32)
10655 {
10656 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10657 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10658 operands[3] = GEN_INT (31 - nb);
10659 operands[4] = GEN_INT (31 - ne);
10660 if (dot)
10661 return "rlwimi. %0,%1,%2,%3,%4";
10662 return "rlwimi %0,%1,%2,%3,%4";
10663 }
10664
10665 gcc_unreachable ();
10666 }
10667
10668 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10669 using two machine instructions. */
10670
10671 bool
10672 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10673 {
10674 /* There are two kinds of AND we can handle with two insns:
10675 1) those we can do with two rl* insn;
10676 2) ori[s];xori[s].
10677
10678 We do not handle that last case yet. */
10679
10680 /* If there is just one stretch of ones, we can do it. */
10681 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10682 return true;
10683
10684 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10685 one insn, we can do the whole thing with two. */
10686 unsigned HOST_WIDE_INT val = INTVAL (c);
10687 unsigned HOST_WIDE_INT bit1 = val & -val;
10688 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10689 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10690 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10691 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10692 }
10693
10694 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10695 If EXPAND is true, split rotate-and-mask instructions we generate to
10696 their constituent parts as well (this is used during expand); if DOT
10697 is 1, make the last insn a record-form instruction clobbering the
10698 destination GPR and setting the CC reg (from operands[3]); if 2, set
10699 that GPR as well as the CC reg. */
10700
10701 void
10702 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10703 {
10704 gcc_assert (!(expand && dot));
10705
10706 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10707
10708 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10709 shift right. This generates better code than doing the masks without
10710 shifts, or shifting first right and then left. */
10711 int nb, ne;
10712 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10713 {
10714 gcc_assert (mode == DImode);
10715
10716 int shift = 63 - nb;
10717 if (expand)
10718 {
10719 rtx tmp1 = gen_reg_rtx (DImode);
10720 rtx tmp2 = gen_reg_rtx (DImode);
10721 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10722 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10723 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10724 }
10725 else
10726 {
10727 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10728 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10729 emit_move_insn (operands[0], tmp);
10730 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10731 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10732 }
10733 return;
10734 }
10735
10736 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10737 that does the rest. */
10738 unsigned HOST_WIDE_INT bit1 = val & -val;
10739 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10740 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10741 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10742
10743 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10744 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10745
10746 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10747
10748 /* Two "no-rotate"-and-mask instructions, for SImode. */
10749 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10750 {
10751 gcc_assert (mode == SImode);
10752
10753 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10754 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10755 emit_move_insn (reg, tmp);
10756 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10757 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10758 return;
10759 }
10760
10761 gcc_assert (mode == DImode);
10762
10763 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10764 insns; we have to do the first in SImode, because it wraps. */
10765 if (mask2 <= 0xffffffff
10766 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10767 {
10768 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10769 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10770 GEN_INT (mask1));
10771 rtx reg_low = gen_lowpart (SImode, reg);
10772 emit_move_insn (reg_low, tmp);
10773 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10774 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10775 return;
10776 }
10777
10778 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10779 at the top end), rotate back and clear the other hole. */
10780 int right = exact_log2 (bit3);
10781 int left = 64 - right;
10782
10783 /* Rotate the mask too. */
10784 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10785
10786 if (expand)
10787 {
10788 rtx tmp1 = gen_reg_rtx (DImode);
10789 rtx tmp2 = gen_reg_rtx (DImode);
10790 rtx tmp3 = gen_reg_rtx (DImode);
10791 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10792 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10793 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10794 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10795 }
10796 else
10797 {
10798 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10799 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10800 emit_move_insn (operands[0], tmp);
10801 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10802 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10803 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10804 }
10805 }
10806 \f
10807 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10808 for lfq and stfq insns iff the registers are hard registers. */
10809
10810 int
10811 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10812 {
10813 /* We might have been passed a SUBREG. */
10814 if (!REG_P (reg1) || !REG_P (reg2))
10815 return 0;
10816
10817 /* We might have been passed non floating point registers. */
10818 if (!FP_REGNO_P (REGNO (reg1))
10819 || !FP_REGNO_P (REGNO (reg2)))
10820 return 0;
10821
10822 return (REGNO (reg1) == REGNO (reg2) - 1);
10823 }
10824
10825 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10826 addr1 and addr2 must be in consecutive memory locations
10827 (addr2 == addr1 + 8). */
10828
10829 int
10830 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10831 {
10832 rtx addr1, addr2;
10833 unsigned int reg1, reg2;
10834 int offset1, offset2;
10835
10836 /* The mems cannot be volatile. */
10837 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10838 return 0;
10839
10840 addr1 = XEXP (mem1, 0);
10841 addr2 = XEXP (mem2, 0);
10842
10843 /* Extract an offset (if used) from the first addr. */
10844 if (GET_CODE (addr1) == PLUS)
10845 {
10846 /* If not a REG, return zero. */
10847 if (!REG_P (XEXP (addr1, 0)))
10848 return 0;
10849 else
10850 {
10851 reg1 = REGNO (XEXP (addr1, 0));
10852 /* The offset must be constant! */
10853 if (!CONST_INT_P (XEXP (addr1, 1)))
10854 return 0;
10855 offset1 = INTVAL (XEXP (addr1, 1));
10856 }
10857 }
10858 else if (!REG_P (addr1))
10859 return 0;
10860 else
10861 {
10862 reg1 = REGNO (addr1);
10863 /* This was a simple (mem (reg)) expression. Offset is 0. */
10864 offset1 = 0;
10865 }
10866
10867 /* And now for the second addr. */
10868 if (GET_CODE (addr2) == PLUS)
10869 {
10870 /* If not a REG, return zero. */
10871 if (!REG_P (XEXP (addr2, 0)))
10872 return 0;
10873 else
10874 {
10875 reg2 = REGNO (XEXP (addr2, 0));
10876 /* The offset must be constant. */
10877 if (!CONST_INT_P (XEXP (addr2, 1)))
10878 return 0;
10879 offset2 = INTVAL (XEXP (addr2, 1));
10880 }
10881 }
10882 else if (!REG_P (addr2))
10883 return 0;
10884 else
10885 {
10886 reg2 = REGNO (addr2);
10887 /* This was a simple (mem (reg)) expression. Offset is 0. */
10888 offset2 = 0;
10889 }
10890
10891 /* Both of these must have the same base register. */
10892 if (reg1 != reg2)
10893 return 0;
10894
10895 /* The offset for the second addr must be 8 more than the first addr. */
10896 if (offset2 != offset1 + 8)
10897 return 0;
10898
10899 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10900 instructions. */
10901 return 1;
10902 }
10903 \f
10904 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10905 need to use DDmode, in all other cases we can use the same mode. */
10906 static machine_mode
10907 rs6000_secondary_memory_needed_mode (machine_mode mode)
10908 {
10909 if (lra_in_progress && mode == SDmode)
10910 return DDmode;
10911 return mode;
10912 }
10913
10914 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10915 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10916 only work on the traditional altivec registers, note if an altivec register
10917 was chosen. */
10918
10919 static enum rs6000_reg_type
10920 register_to_reg_type (rtx reg, bool *is_altivec)
10921 {
10922 HOST_WIDE_INT regno;
10923 enum reg_class rclass;
10924
10925 if (SUBREG_P (reg))
10926 reg = SUBREG_REG (reg);
10927
10928 if (!REG_P (reg))
10929 return NO_REG_TYPE;
10930
10931 regno = REGNO (reg);
10932 if (!HARD_REGISTER_NUM_P (regno))
10933 {
10934 if (!lra_in_progress && !reload_completed)
10935 return PSEUDO_REG_TYPE;
10936
10937 regno = true_regnum (reg);
10938 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10939 return PSEUDO_REG_TYPE;
10940 }
10941
10942 gcc_assert (regno >= 0);
10943
10944 if (is_altivec && ALTIVEC_REGNO_P (regno))
10945 *is_altivec = true;
10946
10947 rclass = rs6000_regno_regclass[regno];
10948 return reg_class_to_reg_type[(int)rclass];
10949 }
10950
10951 /* Helper function to return the cost of adding a TOC entry address. */
10952
10953 static inline int
10954 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10955 {
10956 int ret;
10957
10958 if (TARGET_CMODEL != CMODEL_SMALL)
10959 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10960
10961 else
10962 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10963
10964 return ret;
10965 }
10966
10967 /* Helper function for rs6000_secondary_reload to determine whether the memory
10968 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10969 needs reloading. Return negative if the memory is not handled by the memory
10970 helper functions and to try a different reload method, 0 if no additional
10971 instructions are need, and positive to give the extra cost for the
10972 memory. */
10973
10974 static int
10975 rs6000_secondary_reload_memory (rtx addr,
10976 enum reg_class rclass,
10977 machine_mode mode)
10978 {
10979 int extra_cost = 0;
10980 rtx reg, and_arg, plus_arg0, plus_arg1;
10981 addr_mask_type addr_mask;
10982 const char *type = NULL;
10983 const char *fail_msg = NULL;
10984
10985 if (GPR_REG_CLASS_P (rclass))
10986 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10987
10988 else if (rclass == FLOAT_REGS)
10989 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10990
10991 else if (rclass == ALTIVEC_REGS)
10992 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10993
10994 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10995 else if (rclass == VSX_REGS)
10996 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10997 & ~RELOAD_REG_AND_M16);
10998
10999 /* If the register allocator hasn't made up its mind yet on the register
11000 class to use, settle on defaults to use. */
11001 else if (rclass == NO_REGS)
11002 {
11003 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11004 & ~RELOAD_REG_AND_M16);
11005
11006 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11007 addr_mask &= ~(RELOAD_REG_INDEXED
11008 | RELOAD_REG_PRE_INCDEC
11009 | RELOAD_REG_PRE_MODIFY);
11010 }
11011
11012 else
11013 addr_mask = 0;
11014
11015 /* If the register isn't valid in this register class, just return now. */
11016 if ((addr_mask & RELOAD_REG_VALID) == 0)
11017 {
11018 if (TARGET_DEBUG_ADDR)
11019 {
11020 fprintf (stderr,
11021 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11022 "not valid in class\n",
11023 GET_MODE_NAME (mode), reg_class_names[rclass]);
11024 debug_rtx (addr);
11025 }
11026
11027 return -1;
11028 }
11029
11030 switch (GET_CODE (addr))
11031 {
11032 /* Does the register class supports auto update forms for this mode? We
11033 don't need a scratch register, since the powerpc only supports
11034 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11035 case PRE_INC:
11036 case PRE_DEC:
11037 reg = XEXP (addr, 0);
11038 if (!base_reg_operand (addr, GET_MODE (reg)))
11039 {
11040 fail_msg = "no base register #1";
11041 extra_cost = -1;
11042 }
11043
11044 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11045 {
11046 extra_cost = 1;
11047 type = "update";
11048 }
11049 break;
11050
11051 case PRE_MODIFY:
11052 reg = XEXP (addr, 0);
11053 plus_arg1 = XEXP (addr, 1);
11054 if (!base_reg_operand (reg, GET_MODE (reg))
11055 || GET_CODE (plus_arg1) != PLUS
11056 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11057 {
11058 fail_msg = "bad PRE_MODIFY";
11059 extra_cost = -1;
11060 }
11061
11062 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11063 {
11064 extra_cost = 1;
11065 type = "update";
11066 }
11067 break;
11068
11069 /* Do we need to simulate AND -16 to clear the bottom address bits used
11070 in VMX load/stores? Only allow the AND for vector sizes. */
11071 case AND:
11072 and_arg = XEXP (addr, 0);
11073 if (GET_MODE_SIZE (mode) != 16
11074 || !CONST_INT_P (XEXP (addr, 1))
11075 || INTVAL (XEXP (addr, 1)) != -16)
11076 {
11077 fail_msg = "bad Altivec AND #1";
11078 extra_cost = -1;
11079 }
11080
11081 if (rclass != ALTIVEC_REGS)
11082 {
11083 if (legitimate_indirect_address_p (and_arg, false))
11084 extra_cost = 1;
11085
11086 else if (legitimate_indexed_address_p (and_arg, false))
11087 extra_cost = 2;
11088
11089 else
11090 {
11091 fail_msg = "bad Altivec AND #2";
11092 extra_cost = -1;
11093 }
11094
11095 type = "and";
11096 }
11097 break;
11098
11099 /* If this is an indirect address, make sure it is a base register. */
11100 case REG:
11101 case SUBREG:
11102 if (!legitimate_indirect_address_p (addr, false))
11103 {
11104 extra_cost = 1;
11105 type = "move";
11106 }
11107 break;
11108
11109 /* If this is an indexed address, make sure the register class can handle
11110 indexed addresses for this mode. */
11111 case PLUS:
11112 plus_arg0 = XEXP (addr, 0);
11113 plus_arg1 = XEXP (addr, 1);
11114
11115 /* (plus (plus (reg) (constant)) (constant)) is generated during
11116 push_reload processing, so handle it now. */
11117 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11118 {
11119 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11120 {
11121 extra_cost = 1;
11122 type = "offset";
11123 }
11124 }
11125
11126 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11127 push_reload processing, so handle it now. */
11128 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11129 {
11130 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11131 {
11132 extra_cost = 1;
11133 type = "indexed #2";
11134 }
11135 }
11136
11137 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11138 {
11139 fail_msg = "no base register #2";
11140 extra_cost = -1;
11141 }
11142
11143 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11144 {
11145 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11146 || !legitimate_indexed_address_p (addr, false))
11147 {
11148 extra_cost = 1;
11149 type = "indexed";
11150 }
11151 }
11152
11153 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11154 && CONST_INT_P (plus_arg1))
11155 {
11156 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11157 {
11158 extra_cost = 1;
11159 type = "vector d-form offset";
11160 }
11161 }
11162
11163 /* Make sure the register class can handle offset addresses. */
11164 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11165 {
11166 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11167 {
11168 extra_cost = 1;
11169 type = "offset #2";
11170 }
11171 }
11172
11173 else
11174 {
11175 fail_msg = "bad PLUS";
11176 extra_cost = -1;
11177 }
11178
11179 break;
11180
11181 case LO_SUM:
11182 /* Quad offsets are restricted and can't handle normal addresses. */
11183 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11184 {
11185 extra_cost = -1;
11186 type = "vector d-form lo_sum";
11187 }
11188
11189 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11190 {
11191 fail_msg = "bad LO_SUM";
11192 extra_cost = -1;
11193 }
11194
11195 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11196 {
11197 extra_cost = 1;
11198 type = "lo_sum";
11199 }
11200 break;
11201
11202 /* Static addresses need to create a TOC entry. */
11203 case CONST:
11204 case SYMBOL_REF:
11205 case LABEL_REF:
11206 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11207 {
11208 extra_cost = -1;
11209 type = "vector d-form lo_sum #2";
11210 }
11211
11212 else
11213 {
11214 type = "address";
11215 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11216 }
11217 break;
11218
11219 /* TOC references look like offsetable memory. */
11220 case UNSPEC:
11221 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11222 {
11223 fail_msg = "bad UNSPEC";
11224 extra_cost = -1;
11225 }
11226
11227 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11228 {
11229 extra_cost = -1;
11230 type = "vector d-form lo_sum #3";
11231 }
11232
11233 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11234 {
11235 extra_cost = 1;
11236 type = "toc reference";
11237 }
11238 break;
11239
11240 default:
11241 {
11242 fail_msg = "bad address";
11243 extra_cost = -1;
11244 }
11245 }
11246
11247 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11248 {
11249 if (extra_cost < 0)
11250 fprintf (stderr,
11251 "rs6000_secondary_reload_memory error: mode = %s, "
11252 "class = %s, addr_mask = '%s', %s\n",
11253 GET_MODE_NAME (mode),
11254 reg_class_names[rclass],
11255 rs6000_debug_addr_mask (addr_mask, false),
11256 (fail_msg != NULL) ? fail_msg : "<bad address>");
11257
11258 else
11259 fprintf (stderr,
11260 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11261 "addr_mask = '%s', extra cost = %d, %s\n",
11262 GET_MODE_NAME (mode),
11263 reg_class_names[rclass],
11264 rs6000_debug_addr_mask (addr_mask, false),
11265 extra_cost,
11266 (type) ? type : "<none>");
11267
11268 debug_rtx (addr);
11269 }
11270
11271 return extra_cost;
11272 }
11273
11274 /* Helper function for rs6000_secondary_reload to return true if a move to a
11275 different register classe is really a simple move. */
11276
11277 static bool
11278 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11279 enum rs6000_reg_type from_type,
11280 machine_mode mode)
11281 {
11282 int size = GET_MODE_SIZE (mode);
11283
11284 /* Add support for various direct moves available. In this function, we only
11285 look at cases where we don't need any extra registers, and one or more
11286 simple move insns are issued. Originally small integers are not allowed
11287 in FPR/VSX registers. Single precision binary floating is not a simple
11288 move because we need to convert to the single precision memory layout.
11289 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11290 need special direct move handling, which we do not support yet. */
11291 if (TARGET_DIRECT_MOVE
11292 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11293 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11294 {
11295 if (TARGET_POWERPC64)
11296 {
11297 /* ISA 2.07: MTVSRD or MVFVSRD. */
11298 if (size == 8)
11299 return true;
11300
11301 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11302 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11303 return true;
11304 }
11305
11306 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11307 if (TARGET_P8_VECTOR)
11308 {
11309 if (mode == SImode)
11310 return true;
11311
11312 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11313 return true;
11314 }
11315
11316 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11317 if (mode == SDmode)
11318 return true;
11319 }
11320
11321 /* Move to/from SPR. */
11322 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11323 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11324 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11325 return true;
11326
11327 return false;
11328 }
11329
11330 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11331 special direct moves that involve allocating an extra register, return the
11332 insn code of the helper function if there is such a function or
11333 CODE_FOR_nothing if not. */
11334
11335 static bool
11336 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11337 enum rs6000_reg_type from_type,
11338 machine_mode mode,
11339 secondary_reload_info *sri,
11340 bool altivec_p)
11341 {
11342 bool ret = false;
11343 enum insn_code icode = CODE_FOR_nothing;
11344 int cost = 0;
11345 int size = GET_MODE_SIZE (mode);
11346
11347 if (TARGET_POWERPC64 && size == 16)
11348 {
11349 /* Handle moving 128-bit values from GPRs to VSX point registers on
11350 ISA 2.07 (power8, power9) when running in 64-bit mode using
11351 XXPERMDI to glue the two 64-bit values back together. */
11352 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11353 {
11354 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11355 icode = reg_addr[mode].reload_vsx_gpr;
11356 }
11357
11358 /* Handle moving 128-bit values from VSX point registers to GPRs on
11359 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11360 bottom 64-bit value. */
11361 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11362 {
11363 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11364 icode = reg_addr[mode].reload_gpr_vsx;
11365 }
11366 }
11367
11368 else if (TARGET_POWERPC64 && mode == SFmode)
11369 {
11370 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11371 {
11372 cost = 3; /* xscvdpspn, mfvsrd, and. */
11373 icode = reg_addr[mode].reload_gpr_vsx;
11374 }
11375
11376 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11377 {
11378 cost = 2; /* mtvsrz, xscvspdpn. */
11379 icode = reg_addr[mode].reload_vsx_gpr;
11380 }
11381 }
11382
11383 else if (!TARGET_POWERPC64 && size == 8)
11384 {
11385 /* Handle moving 64-bit values from GPRs to floating point registers on
11386 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11387 32-bit values back together. Altivec register classes must be handled
11388 specially since a different instruction is used, and the secondary
11389 reload support requires a single instruction class in the scratch
11390 register constraint. However, right now TFmode is not allowed in
11391 Altivec registers, so the pattern will never match. */
11392 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11393 {
11394 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11395 icode = reg_addr[mode].reload_fpr_gpr;
11396 }
11397 }
11398
11399 if (icode != CODE_FOR_nothing)
11400 {
11401 ret = true;
11402 if (sri)
11403 {
11404 sri->icode = icode;
11405 sri->extra_cost = cost;
11406 }
11407 }
11408
11409 return ret;
11410 }
11411
11412 /* Return whether a move between two register classes can be done either
11413 directly (simple move) or via a pattern that uses a single extra temporary
11414 (using ISA 2.07's direct move in this case. */
11415
11416 static bool
11417 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11418 enum rs6000_reg_type from_type,
11419 machine_mode mode,
11420 secondary_reload_info *sri,
11421 bool altivec_p)
11422 {
11423 /* Fall back to load/store reloads if either type is not a register. */
11424 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11425 return false;
11426
11427 /* If we haven't allocated registers yet, assume the move can be done for the
11428 standard register types. */
11429 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11430 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11431 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11432 return true;
11433
11434 /* Moves to the same set of registers is a simple move for non-specialized
11435 registers. */
11436 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11437 return true;
11438
11439 /* Check whether a simple move can be done directly. */
11440 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11441 {
11442 if (sri)
11443 {
11444 sri->icode = CODE_FOR_nothing;
11445 sri->extra_cost = 0;
11446 }
11447 return true;
11448 }
11449
11450 /* Now check if we can do it in a few steps. */
11451 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11452 altivec_p);
11453 }
11454
11455 /* Inform reload about cases where moving X with a mode MODE to a register in
11456 RCLASS requires an extra scratch or immediate register. Return the class
11457 needed for the immediate register.
11458
11459 For VSX and Altivec, we may need a register to convert sp+offset into
11460 reg+sp.
11461
11462 For misaligned 64-bit gpr loads and stores we need a register to
11463 convert an offset address to indirect. */
11464
11465 static reg_class_t
11466 rs6000_secondary_reload (bool in_p,
11467 rtx x,
11468 reg_class_t rclass_i,
11469 machine_mode mode,
11470 secondary_reload_info *sri)
11471 {
11472 enum reg_class rclass = (enum reg_class) rclass_i;
11473 reg_class_t ret = ALL_REGS;
11474 enum insn_code icode;
11475 bool default_p = false;
11476 bool done_p = false;
11477
11478 /* Allow subreg of memory before/during reload. */
11479 bool memory_p = (MEM_P (x)
11480 || (!reload_completed && SUBREG_P (x)
11481 && MEM_P (SUBREG_REG (x))));
11482
11483 sri->icode = CODE_FOR_nothing;
11484 sri->t_icode = CODE_FOR_nothing;
11485 sri->extra_cost = 0;
11486 icode = ((in_p)
11487 ? reg_addr[mode].reload_load
11488 : reg_addr[mode].reload_store);
11489
11490 if (REG_P (x) || register_operand (x, mode))
11491 {
11492 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11493 bool altivec_p = (rclass == ALTIVEC_REGS);
11494 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11495
11496 if (!in_p)
11497 std::swap (to_type, from_type);
11498
11499 /* Can we do a direct move of some sort? */
11500 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11501 altivec_p))
11502 {
11503 icode = (enum insn_code)sri->icode;
11504 default_p = false;
11505 done_p = true;
11506 ret = NO_REGS;
11507 }
11508 }
11509
11510 /* Make sure 0.0 is not reloaded or forced into memory. */
11511 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11512 {
11513 ret = NO_REGS;
11514 default_p = false;
11515 done_p = true;
11516 }
11517
11518 /* If this is a scalar floating point value and we want to load it into the
11519 traditional Altivec registers, do it via a move via a traditional floating
11520 point register, unless we have D-form addressing. Also make sure that
11521 non-zero constants use a FPR. */
11522 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11523 && !mode_supports_vmx_dform (mode)
11524 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11525 && (memory_p || CONST_DOUBLE_P (x)))
11526 {
11527 ret = FLOAT_REGS;
11528 default_p = false;
11529 done_p = true;
11530 }
11531
11532 /* Handle reload of load/stores if we have reload helper functions. */
11533 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11534 {
11535 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11536 mode);
11537
11538 if (extra_cost >= 0)
11539 {
11540 done_p = true;
11541 ret = NO_REGS;
11542 if (extra_cost > 0)
11543 {
11544 sri->extra_cost = extra_cost;
11545 sri->icode = icode;
11546 }
11547 }
11548 }
11549
11550 /* Handle unaligned loads and stores of integer registers. */
11551 if (!done_p && TARGET_POWERPC64
11552 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11553 && memory_p
11554 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11555 {
11556 rtx addr = XEXP (x, 0);
11557 rtx off = address_offset (addr);
11558
11559 if (off != NULL_RTX)
11560 {
11561 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11562 unsigned HOST_WIDE_INT offset = INTVAL (off);
11563
11564 /* We need a secondary reload when our legitimate_address_p
11565 says the address is good (as otherwise the entire address
11566 will be reloaded), and the offset is not a multiple of
11567 four or we have an address wrap. Address wrap will only
11568 occur for LO_SUMs since legitimate_offset_address_p
11569 rejects addresses for 16-byte mems that will wrap. */
11570 if (GET_CODE (addr) == LO_SUM
11571 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11572 && ((offset & 3) != 0
11573 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11574 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11575 && (offset & 3) != 0))
11576 {
11577 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11578 if (in_p)
11579 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11580 : CODE_FOR_reload_di_load);
11581 else
11582 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11583 : CODE_FOR_reload_di_store);
11584 sri->extra_cost = 2;
11585 ret = NO_REGS;
11586 done_p = true;
11587 }
11588 else
11589 default_p = true;
11590 }
11591 else
11592 default_p = true;
11593 }
11594
11595 if (!done_p && !TARGET_POWERPC64
11596 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11597 && memory_p
11598 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11599 {
11600 rtx addr = XEXP (x, 0);
11601 rtx off = address_offset (addr);
11602
11603 if (off != NULL_RTX)
11604 {
11605 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11606 unsigned HOST_WIDE_INT offset = INTVAL (off);
11607
11608 /* We need a secondary reload when our legitimate_address_p
11609 says the address is good (as otherwise the entire address
11610 will be reloaded), and we have a wrap.
11611
11612 legitimate_lo_sum_address_p allows LO_SUM addresses to
11613 have any offset so test for wrap in the low 16 bits.
11614
11615 legitimate_offset_address_p checks for the range
11616 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11617 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11618 [0x7ff4,0x7fff] respectively, so test for the
11619 intersection of these ranges, [0x7ffc,0x7fff] and
11620 [0x7ff4,0x7ff7] respectively.
11621
11622 Note that the address we see here may have been
11623 manipulated by legitimize_reload_address. */
11624 if (GET_CODE (addr) == LO_SUM
11625 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11626 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11627 {
11628 if (in_p)
11629 sri->icode = CODE_FOR_reload_si_load;
11630 else
11631 sri->icode = CODE_FOR_reload_si_store;
11632 sri->extra_cost = 2;
11633 ret = NO_REGS;
11634 done_p = true;
11635 }
11636 else
11637 default_p = true;
11638 }
11639 else
11640 default_p = true;
11641 }
11642
11643 if (!done_p)
11644 default_p = true;
11645
11646 if (default_p)
11647 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11648
11649 gcc_assert (ret != ALL_REGS);
11650
11651 if (TARGET_DEBUG_ADDR)
11652 {
11653 fprintf (stderr,
11654 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11655 "mode = %s",
11656 reg_class_names[ret],
11657 in_p ? "true" : "false",
11658 reg_class_names[rclass],
11659 GET_MODE_NAME (mode));
11660
11661 if (reload_completed)
11662 fputs (", after reload", stderr);
11663
11664 if (!done_p)
11665 fputs (", done_p not set", stderr);
11666
11667 if (default_p)
11668 fputs (", default secondary reload", stderr);
11669
11670 if (sri->icode != CODE_FOR_nothing)
11671 fprintf (stderr, ", reload func = %s, extra cost = %d",
11672 insn_data[sri->icode].name, sri->extra_cost);
11673
11674 else if (sri->extra_cost > 0)
11675 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11676
11677 fputs ("\n", stderr);
11678 debug_rtx (x);
11679 }
11680
11681 return ret;
11682 }
11683
11684 /* Better tracing for rs6000_secondary_reload_inner. */
11685
11686 static void
11687 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11688 bool store_p)
11689 {
11690 rtx set, clobber;
11691
11692 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11693
11694 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11695 store_p ? "store" : "load");
11696
11697 if (store_p)
11698 set = gen_rtx_SET (mem, reg);
11699 else
11700 set = gen_rtx_SET (reg, mem);
11701
11702 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11703 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11704 }
11705
11706 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11707 ATTRIBUTE_NORETURN;
11708
11709 static void
11710 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11711 bool store_p)
11712 {
11713 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11714 gcc_unreachable ();
11715 }
11716
11717 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11718 reload helper functions. These were identified in
11719 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11720 reload, it calls the insns:
11721 reload_<RELOAD:mode>_<P:mptrsize>_store
11722 reload_<RELOAD:mode>_<P:mptrsize>_load
11723
11724 which in turn calls this function, to do whatever is necessary to create
11725 valid addresses. */
11726
11727 void
11728 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11729 {
11730 int regno = true_regnum (reg);
11731 machine_mode mode = GET_MODE (reg);
11732 addr_mask_type addr_mask;
11733 rtx addr;
11734 rtx new_addr;
11735 rtx op_reg, op0, op1;
11736 rtx and_op;
11737 rtx cc_clobber;
11738 rtvec rv;
11739
11740 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11741 || !base_reg_operand (scratch, GET_MODE (scratch)))
11742 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11743
11744 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11745 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11746
11747 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11748 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11749
11750 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11751 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11752
11753 else
11754 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11755
11756 /* Make sure the mode is valid in this register class. */
11757 if ((addr_mask & RELOAD_REG_VALID) == 0)
11758 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11759
11760 if (TARGET_DEBUG_ADDR)
11761 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11762
11763 new_addr = addr = XEXP (mem, 0);
11764 switch (GET_CODE (addr))
11765 {
11766 /* Does the register class support auto update forms for this mode? If
11767 not, do the update now. We don't need a scratch register, since the
11768 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11769 case PRE_INC:
11770 case PRE_DEC:
11771 op_reg = XEXP (addr, 0);
11772 if (!base_reg_operand (op_reg, Pmode))
11773 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11774
11775 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11776 {
11777 int delta = GET_MODE_SIZE (mode);
11778 if (GET_CODE (addr) == PRE_DEC)
11779 delta = -delta;
11780 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11781 new_addr = op_reg;
11782 }
11783 break;
11784
11785 case PRE_MODIFY:
11786 op0 = XEXP (addr, 0);
11787 op1 = XEXP (addr, 1);
11788 if (!base_reg_operand (op0, Pmode)
11789 || GET_CODE (op1) != PLUS
11790 || !rtx_equal_p (op0, XEXP (op1, 0)))
11791 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11792
11793 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11794 {
11795 emit_insn (gen_rtx_SET (op0, op1));
11796 new_addr = reg;
11797 }
11798 break;
11799
11800 /* Do we need to simulate AND -16 to clear the bottom address bits used
11801 in VMX load/stores? */
11802 case AND:
11803 op0 = XEXP (addr, 0);
11804 op1 = XEXP (addr, 1);
11805 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11806 {
11807 if (REG_P (op0) || SUBREG_P (op0))
11808 op_reg = op0;
11809
11810 else if (GET_CODE (op1) == PLUS)
11811 {
11812 emit_insn (gen_rtx_SET (scratch, op1));
11813 op_reg = scratch;
11814 }
11815
11816 else
11817 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11818
11819 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11820 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11821 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11822 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11823 new_addr = scratch;
11824 }
11825 break;
11826
11827 /* If this is an indirect address, make sure it is a base register. */
11828 case REG:
11829 case SUBREG:
11830 if (!base_reg_operand (addr, GET_MODE (addr)))
11831 {
11832 emit_insn (gen_rtx_SET (scratch, addr));
11833 new_addr = scratch;
11834 }
11835 break;
11836
11837 /* If this is an indexed address, make sure the register class can handle
11838 indexed addresses for this mode. */
11839 case PLUS:
11840 op0 = XEXP (addr, 0);
11841 op1 = XEXP (addr, 1);
11842 if (!base_reg_operand (op0, Pmode))
11843 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11844
11845 else if (int_reg_operand (op1, Pmode))
11846 {
11847 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11848 {
11849 emit_insn (gen_rtx_SET (scratch, addr));
11850 new_addr = scratch;
11851 }
11852 }
11853
11854 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11855 {
11856 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11857 || !quad_address_p (addr, mode, false))
11858 {
11859 emit_insn (gen_rtx_SET (scratch, addr));
11860 new_addr = scratch;
11861 }
11862 }
11863
11864 /* Make sure the register class can handle offset addresses. */
11865 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11866 {
11867 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11868 {
11869 emit_insn (gen_rtx_SET (scratch, addr));
11870 new_addr = scratch;
11871 }
11872 }
11873
11874 else
11875 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11876
11877 break;
11878
11879 case LO_SUM:
11880 op0 = XEXP (addr, 0);
11881 op1 = XEXP (addr, 1);
11882 if (!base_reg_operand (op0, Pmode))
11883 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11884
11885 else if (int_reg_operand (op1, Pmode))
11886 {
11887 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11888 {
11889 emit_insn (gen_rtx_SET (scratch, addr));
11890 new_addr = scratch;
11891 }
11892 }
11893
11894 /* Quad offsets are restricted and can't handle normal addresses. */
11895 else if (mode_supports_dq_form (mode))
11896 {
11897 emit_insn (gen_rtx_SET (scratch, addr));
11898 new_addr = scratch;
11899 }
11900
11901 /* Make sure the register class can handle offset addresses. */
11902 else if (legitimate_lo_sum_address_p (mode, addr, false))
11903 {
11904 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11905 {
11906 emit_insn (gen_rtx_SET (scratch, addr));
11907 new_addr = scratch;
11908 }
11909 }
11910
11911 else
11912 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11913
11914 break;
11915
11916 case SYMBOL_REF:
11917 case CONST:
11918 case LABEL_REF:
11919 rs6000_emit_move (scratch, addr, Pmode);
11920 new_addr = scratch;
11921 break;
11922
11923 default:
11924 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11925 }
11926
11927 /* Adjust the address if it changed. */
11928 if (addr != new_addr)
11929 {
11930 mem = replace_equiv_address_nv (mem, new_addr);
11931 if (TARGET_DEBUG_ADDR)
11932 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11933 }
11934
11935 /* Now create the move. */
11936 if (store_p)
11937 emit_insn (gen_rtx_SET (mem, reg));
11938 else
11939 emit_insn (gen_rtx_SET (reg, mem));
11940
11941 return;
11942 }
11943
11944 /* Convert reloads involving 64-bit gprs and misaligned offset
11945 addressing, or multiple 32-bit gprs and offsets that are too large,
11946 to use indirect addressing. */
11947
11948 void
11949 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11950 {
11951 int regno = true_regnum (reg);
11952 enum reg_class rclass;
11953 rtx addr;
11954 rtx scratch_or_premodify = scratch;
11955
11956 if (TARGET_DEBUG_ADDR)
11957 {
11958 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11959 store_p ? "store" : "load");
11960 fprintf (stderr, "reg:\n");
11961 debug_rtx (reg);
11962 fprintf (stderr, "mem:\n");
11963 debug_rtx (mem);
11964 fprintf (stderr, "scratch:\n");
11965 debug_rtx (scratch);
11966 }
11967
11968 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11969 gcc_assert (MEM_P (mem));
11970 rclass = REGNO_REG_CLASS (regno);
11971 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11972 addr = XEXP (mem, 0);
11973
11974 if (GET_CODE (addr) == PRE_MODIFY)
11975 {
11976 gcc_assert (REG_P (XEXP (addr, 0))
11977 && GET_CODE (XEXP (addr, 1)) == PLUS
11978 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11979 scratch_or_premodify = XEXP (addr, 0);
11980 addr = XEXP (addr, 1);
11981 }
11982 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11983
11984 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11985
11986 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11987
11988 /* Now create the move. */
11989 if (store_p)
11990 emit_insn (gen_rtx_SET (mem, reg));
11991 else
11992 emit_insn (gen_rtx_SET (reg, mem));
11993
11994 return;
11995 }
11996
11997 /* Given an rtx X being reloaded into a reg required to be
11998 in class CLASS, return the class of reg to actually use.
11999 In general this is just CLASS; but on some machines
12000 in some cases it is preferable to use a more restrictive class.
12001
12002 On the RS/6000, we have to return NO_REGS when we want to reload a
12003 floating-point CONST_DOUBLE to force it to be copied to memory.
12004
12005 We also don't want to reload integer values into floating-point
12006 registers if we can at all help it. In fact, this can
12007 cause reload to die, if it tries to generate a reload of CTR
12008 into a FP register and discovers it doesn't have the memory location
12009 required.
12010
12011 ??? Would it be a good idea to have reload do the converse, that is
12012 try to reload floating modes into FP registers if possible?
12013 */
12014
12015 static enum reg_class
12016 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12017 {
12018 machine_mode mode = GET_MODE (x);
12019 bool is_constant = CONSTANT_P (x);
12020
12021 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12022 reload class for it. */
12023 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12024 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12025 return NO_REGS;
12026
12027 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12028 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12029 return NO_REGS;
12030
12031 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12032 the reloading of address expressions using PLUS into floating point
12033 registers. */
12034 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12035 {
12036 if (is_constant)
12037 {
12038 /* Zero is always allowed in all VSX registers. */
12039 if (x == CONST0_RTX (mode))
12040 return rclass;
12041
12042 /* If this is a vector constant that can be formed with a few Altivec
12043 instructions, we want altivec registers. */
12044 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12045 return ALTIVEC_REGS;
12046
12047 /* If this is an integer constant that can easily be loaded into
12048 vector registers, allow it. */
12049 if (CONST_INT_P (x))
12050 {
12051 HOST_WIDE_INT value = INTVAL (x);
12052
12053 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12054 2.06 can generate it in the Altivec registers with
12055 VSPLTI<x>. */
12056 if (value == -1)
12057 {
12058 if (TARGET_P8_VECTOR)
12059 return rclass;
12060 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12061 return ALTIVEC_REGS;
12062 else
12063 return NO_REGS;
12064 }
12065
12066 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12067 a sign extend in the Altivec registers. */
12068 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12069 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12070 return ALTIVEC_REGS;
12071 }
12072
12073 /* Force constant to memory. */
12074 return NO_REGS;
12075 }
12076
12077 /* D-form addressing can easily reload the value. */
12078 if (mode_supports_vmx_dform (mode)
12079 || mode_supports_dq_form (mode))
12080 return rclass;
12081
12082 /* If this is a scalar floating point value and we don't have D-form
12083 addressing, prefer the traditional floating point registers so that we
12084 can use D-form (register+offset) addressing. */
12085 if (rclass == VSX_REGS
12086 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12087 return FLOAT_REGS;
12088
12089 /* Prefer the Altivec registers if Altivec is handling the vector
12090 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12091 loads. */
12092 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12093 || mode == V1TImode)
12094 return ALTIVEC_REGS;
12095
12096 return rclass;
12097 }
12098
12099 if (is_constant || GET_CODE (x) == PLUS)
12100 {
12101 if (reg_class_subset_p (GENERAL_REGS, rclass))
12102 return GENERAL_REGS;
12103 if (reg_class_subset_p (BASE_REGS, rclass))
12104 return BASE_REGS;
12105 return NO_REGS;
12106 }
12107
12108 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12109 return GENERAL_REGS;
12110
12111 return rclass;
12112 }
12113
12114 /* Debug version of rs6000_preferred_reload_class. */
12115 static enum reg_class
12116 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12117 {
12118 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12119
12120 fprintf (stderr,
12121 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12122 "mode = %s, x:\n",
12123 reg_class_names[ret], reg_class_names[rclass],
12124 GET_MODE_NAME (GET_MODE (x)));
12125 debug_rtx (x);
12126
12127 return ret;
12128 }
12129
12130 /* If we are copying between FP or AltiVec registers and anything else, we need
12131 a memory location. The exception is when we are targeting ppc64 and the
12132 move to/from fpr to gpr instructions are available. Also, under VSX, you
12133 can copy vector registers from the FP register set to the Altivec register
12134 set and vice versa. */
12135
12136 static bool
12137 rs6000_secondary_memory_needed (machine_mode mode,
12138 reg_class_t from_class,
12139 reg_class_t to_class)
12140 {
12141 enum rs6000_reg_type from_type, to_type;
12142 bool altivec_p = ((from_class == ALTIVEC_REGS)
12143 || (to_class == ALTIVEC_REGS));
12144
12145 /* If a simple/direct move is available, we don't need secondary memory */
12146 from_type = reg_class_to_reg_type[(int)from_class];
12147 to_type = reg_class_to_reg_type[(int)to_class];
12148
12149 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12150 (secondary_reload_info *)0, altivec_p))
12151 return false;
12152
12153 /* If we have a floating point or vector register class, we need to use
12154 memory to transfer the data. */
12155 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12156 return true;
12157
12158 return false;
12159 }
12160
12161 /* Debug version of rs6000_secondary_memory_needed. */
12162 static bool
12163 rs6000_debug_secondary_memory_needed (machine_mode mode,
12164 reg_class_t from_class,
12165 reg_class_t to_class)
12166 {
12167 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12168
12169 fprintf (stderr,
12170 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12171 "to_class = %s, mode = %s\n",
12172 ret ? "true" : "false",
12173 reg_class_names[from_class],
12174 reg_class_names[to_class],
12175 GET_MODE_NAME (mode));
12176
12177 return ret;
12178 }
12179
12180 /* Return the register class of a scratch register needed to copy IN into
12181 or out of a register in RCLASS in MODE. If it can be done directly,
12182 NO_REGS is returned. */
12183
12184 static enum reg_class
12185 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12186 rtx in)
12187 {
12188 int regno;
12189
12190 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12191 #if TARGET_MACHO
12192 && MACHOPIC_INDIRECT
12193 #endif
12194 ))
12195 {
12196 /* We cannot copy a symbolic operand directly into anything
12197 other than BASE_REGS for TARGET_ELF. So indicate that a
12198 register from BASE_REGS is needed as an intermediate
12199 register.
12200
12201 On Darwin, pic addresses require a load from memory, which
12202 needs a base register. */
12203 if (rclass != BASE_REGS
12204 && (SYMBOL_REF_P (in)
12205 || GET_CODE (in) == HIGH
12206 || GET_CODE (in) == LABEL_REF
12207 || GET_CODE (in) == CONST))
12208 return BASE_REGS;
12209 }
12210
12211 if (REG_P (in))
12212 {
12213 regno = REGNO (in);
12214 if (!HARD_REGISTER_NUM_P (regno))
12215 {
12216 regno = true_regnum (in);
12217 if (!HARD_REGISTER_NUM_P (regno))
12218 regno = -1;
12219 }
12220 }
12221 else if (SUBREG_P (in))
12222 {
12223 regno = true_regnum (in);
12224 if (!HARD_REGISTER_NUM_P (regno))
12225 regno = -1;
12226 }
12227 else
12228 regno = -1;
12229
12230 /* If we have VSX register moves, prefer moving scalar values between
12231 Altivec registers and GPR by going via an FPR (and then via memory)
12232 instead of reloading the secondary memory address for Altivec moves. */
12233 if (TARGET_VSX
12234 && GET_MODE_SIZE (mode) < 16
12235 && !mode_supports_vmx_dform (mode)
12236 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12237 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12238 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12239 && (regno >= 0 && INT_REGNO_P (regno)))))
12240 return FLOAT_REGS;
12241
12242 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12243 into anything. */
12244 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12245 || (regno >= 0 && INT_REGNO_P (regno)))
12246 return NO_REGS;
12247
12248 /* Constants, memory, and VSX registers can go into VSX registers (both the
12249 traditional floating point and the altivec registers). */
12250 if (rclass == VSX_REGS
12251 && (regno == -1 || VSX_REGNO_P (regno)))
12252 return NO_REGS;
12253
12254 /* Constants, memory, and FP registers can go into FP registers. */
12255 if ((regno == -1 || FP_REGNO_P (regno))
12256 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12257 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12258
12259 /* Memory, and AltiVec registers can go into AltiVec registers. */
12260 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12261 && rclass == ALTIVEC_REGS)
12262 return NO_REGS;
12263
12264 /* We can copy among the CR registers. */
12265 if ((rclass == CR_REGS || rclass == CR0_REGS)
12266 && regno >= 0 && CR_REGNO_P (regno))
12267 return NO_REGS;
12268
12269 /* Otherwise, we need GENERAL_REGS. */
12270 return GENERAL_REGS;
12271 }
12272
12273 /* Debug version of rs6000_secondary_reload_class. */
12274 static enum reg_class
12275 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12276 machine_mode mode, rtx in)
12277 {
12278 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12279 fprintf (stderr,
12280 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12281 "mode = %s, input rtx:\n",
12282 reg_class_names[ret], reg_class_names[rclass],
12283 GET_MODE_NAME (mode));
12284 debug_rtx (in);
12285
12286 return ret;
12287 }
12288
12289 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12290
12291 static bool
12292 rs6000_can_change_mode_class (machine_mode from,
12293 machine_mode to,
12294 reg_class_t rclass)
12295 {
12296 unsigned from_size = GET_MODE_SIZE (from);
12297 unsigned to_size = GET_MODE_SIZE (to);
12298
12299 if (from_size != to_size)
12300 {
12301 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12302
12303 if (reg_classes_intersect_p (xclass, rclass))
12304 {
12305 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12306 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12307 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12308 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12309
12310 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12311 single register under VSX because the scalar part of the register
12312 is in the upper 64-bits, and not the lower 64-bits. Types like
12313 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12314 IEEE floating point can't overlap, and neither can small
12315 values. */
12316
12317 if (to_float128_vector_p && from_float128_vector_p)
12318 return true;
12319
12320 else if (to_float128_vector_p || from_float128_vector_p)
12321 return false;
12322
12323 /* TDmode in floating-mode registers must always go into a register
12324 pair with the most significant word in the even-numbered register
12325 to match ISA requirements. In little-endian mode, this does not
12326 match subreg numbering, so we cannot allow subregs. */
12327 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12328 return false;
12329
12330 /* Allow SD<->DD changes, since SDmode values are stored in
12331 the low half of the DDmode, just like target-independent
12332 code expects. We need to allow at least SD->DD since
12333 rs6000_secondary_memory_needed_mode asks for that change
12334 to be made for SD reloads. */
12335 if ((to == DDmode && from == SDmode)
12336 || (to == SDmode && from == DDmode))
12337 return true;
12338
12339 if (from_size < 8 || to_size < 8)
12340 return false;
12341
12342 if (from_size == 8 && (8 * to_nregs) != to_size)
12343 return false;
12344
12345 if (to_size == 8 && (8 * from_nregs) != from_size)
12346 return false;
12347
12348 return true;
12349 }
12350 else
12351 return true;
12352 }
12353
12354 /* Since the VSX register set includes traditional floating point registers
12355 and altivec registers, just check for the size being different instead of
12356 trying to check whether the modes are vector modes. Otherwise it won't
12357 allow say DF and DI to change classes. For types like TFmode and TDmode
12358 that take 2 64-bit registers, rather than a single 128-bit register, don't
12359 allow subregs of those types to other 128 bit types. */
12360 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12361 {
12362 unsigned num_regs = (from_size + 15) / 16;
12363 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12364 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12365 return false;
12366
12367 return (from_size == 8 || from_size == 16);
12368 }
12369
12370 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12371 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12372 return false;
12373
12374 return true;
12375 }
12376
12377 /* Debug version of rs6000_can_change_mode_class. */
12378 static bool
12379 rs6000_debug_can_change_mode_class (machine_mode from,
12380 machine_mode to,
12381 reg_class_t rclass)
12382 {
12383 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12384
12385 fprintf (stderr,
12386 "rs6000_can_change_mode_class, return %s, from = %s, "
12387 "to = %s, rclass = %s\n",
12388 ret ? "true" : "false",
12389 GET_MODE_NAME (from), GET_MODE_NAME (to),
12390 reg_class_names[rclass]);
12391
12392 return ret;
12393 }
12394 \f
12395 /* Return a string to do a move operation of 128 bits of data. */
12396
12397 const char *
12398 rs6000_output_move_128bit (rtx operands[])
12399 {
12400 rtx dest = operands[0];
12401 rtx src = operands[1];
12402 machine_mode mode = GET_MODE (dest);
12403 int dest_regno;
12404 int src_regno;
12405 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12406 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12407
12408 if (REG_P (dest))
12409 {
12410 dest_regno = REGNO (dest);
12411 dest_gpr_p = INT_REGNO_P (dest_regno);
12412 dest_fp_p = FP_REGNO_P (dest_regno);
12413 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12414 dest_vsx_p = dest_fp_p | dest_vmx_p;
12415 }
12416 else
12417 {
12418 dest_regno = -1;
12419 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12420 }
12421
12422 if (REG_P (src))
12423 {
12424 src_regno = REGNO (src);
12425 src_gpr_p = INT_REGNO_P (src_regno);
12426 src_fp_p = FP_REGNO_P (src_regno);
12427 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12428 src_vsx_p = src_fp_p | src_vmx_p;
12429 }
12430 else
12431 {
12432 src_regno = -1;
12433 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12434 }
12435
12436 /* Register moves. */
12437 if (dest_regno >= 0 && src_regno >= 0)
12438 {
12439 if (dest_gpr_p)
12440 {
12441 if (src_gpr_p)
12442 return "#";
12443
12444 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12445 return (WORDS_BIG_ENDIAN
12446 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12447 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12448
12449 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12450 return "#";
12451 }
12452
12453 else if (TARGET_VSX && dest_vsx_p)
12454 {
12455 if (src_vsx_p)
12456 return "xxlor %x0,%x1,%x1";
12457
12458 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12459 return (WORDS_BIG_ENDIAN
12460 ? "mtvsrdd %x0,%1,%L1"
12461 : "mtvsrdd %x0,%L1,%1");
12462
12463 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12464 return "#";
12465 }
12466
12467 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12468 return "vor %0,%1,%1";
12469
12470 else if (dest_fp_p && src_fp_p)
12471 return "#";
12472 }
12473
12474 /* Loads. */
12475 else if (dest_regno >= 0 && MEM_P (src))
12476 {
12477 if (dest_gpr_p)
12478 {
12479 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12480 return "lq %0,%1";
12481 else
12482 return "#";
12483 }
12484
12485 else if (TARGET_ALTIVEC && dest_vmx_p
12486 && altivec_indexed_or_indirect_operand (src, mode))
12487 return "lvx %0,%y1";
12488
12489 else if (TARGET_VSX && dest_vsx_p)
12490 {
12491 if (mode_supports_dq_form (mode)
12492 && quad_address_p (XEXP (src, 0), mode, true))
12493 return "lxv %x0,%1";
12494
12495 else if (TARGET_P9_VECTOR)
12496 return "lxvx %x0,%y1";
12497
12498 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12499 return "lxvw4x %x0,%y1";
12500
12501 else
12502 return "lxvd2x %x0,%y1";
12503 }
12504
12505 else if (TARGET_ALTIVEC && dest_vmx_p)
12506 return "lvx %0,%y1";
12507
12508 else if (dest_fp_p)
12509 return "#";
12510 }
12511
12512 /* Stores. */
12513 else if (src_regno >= 0 && MEM_P (dest))
12514 {
12515 if (src_gpr_p)
12516 {
12517 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12518 return "stq %1,%0";
12519 else
12520 return "#";
12521 }
12522
12523 else if (TARGET_ALTIVEC && src_vmx_p
12524 && altivec_indexed_or_indirect_operand (dest, mode))
12525 return "stvx %1,%y0";
12526
12527 else if (TARGET_VSX && src_vsx_p)
12528 {
12529 if (mode_supports_dq_form (mode)
12530 && quad_address_p (XEXP (dest, 0), mode, true))
12531 return "stxv %x1,%0";
12532
12533 else if (TARGET_P9_VECTOR)
12534 return "stxvx %x1,%y0";
12535
12536 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12537 return "stxvw4x %x1,%y0";
12538
12539 else
12540 return "stxvd2x %x1,%y0";
12541 }
12542
12543 else if (TARGET_ALTIVEC && src_vmx_p)
12544 return "stvx %1,%y0";
12545
12546 else if (src_fp_p)
12547 return "#";
12548 }
12549
12550 /* Constants. */
12551 else if (dest_regno >= 0
12552 && (CONST_INT_P (src)
12553 || CONST_WIDE_INT_P (src)
12554 || CONST_DOUBLE_P (src)
12555 || GET_CODE (src) == CONST_VECTOR))
12556 {
12557 if (dest_gpr_p)
12558 return "#";
12559
12560 else if ((dest_vmx_p && TARGET_ALTIVEC)
12561 || (dest_vsx_p && TARGET_VSX))
12562 return output_vec_const_move (operands);
12563 }
12564
12565 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12566 }
12567
12568 /* Validate a 128-bit move. */
12569 bool
12570 rs6000_move_128bit_ok_p (rtx operands[])
12571 {
12572 machine_mode mode = GET_MODE (operands[0]);
12573 return (gpc_reg_operand (operands[0], mode)
12574 || gpc_reg_operand (operands[1], mode));
12575 }
12576
12577 /* Return true if a 128-bit move needs to be split. */
12578 bool
12579 rs6000_split_128bit_ok_p (rtx operands[])
12580 {
12581 if (!reload_completed)
12582 return false;
12583
12584 if (!gpr_or_gpr_p (operands[0], operands[1]))
12585 return false;
12586
12587 if (quad_load_store_p (operands[0], operands[1]))
12588 return false;
12589
12590 return true;
12591 }
12592
12593 \f
12594 /* Given a comparison operation, return the bit number in CCR to test. We
12595 know this is a valid comparison.
12596
12597 SCC_P is 1 if this is for an scc. That means that %D will have been
12598 used instead of %C, so the bits will be in different places.
12599
12600 Return -1 if OP isn't a valid comparison for some reason. */
12601
12602 int
12603 ccr_bit (rtx op, int scc_p)
12604 {
12605 enum rtx_code code = GET_CODE (op);
12606 machine_mode cc_mode;
12607 int cc_regnum;
12608 int base_bit;
12609 rtx reg;
12610
12611 if (!COMPARISON_P (op))
12612 return -1;
12613
12614 reg = XEXP (op, 0);
12615
12616 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12617 return -1;
12618
12619 cc_mode = GET_MODE (reg);
12620 cc_regnum = REGNO (reg);
12621 base_bit = 4 * (cc_regnum - CR0_REGNO);
12622
12623 validate_condition_mode (code, cc_mode);
12624
12625 /* When generating a sCOND operation, only positive conditions are
12626 allowed. */
12627 if (scc_p)
12628 switch (code)
12629 {
12630 case EQ:
12631 case GT:
12632 case LT:
12633 case UNORDERED:
12634 case GTU:
12635 case LTU:
12636 break;
12637 default:
12638 return -1;
12639 }
12640
12641 switch (code)
12642 {
12643 case NE:
12644 return scc_p ? base_bit + 3 : base_bit + 2;
12645 case EQ:
12646 return base_bit + 2;
12647 case GT: case GTU: case UNLE:
12648 return base_bit + 1;
12649 case LT: case LTU: case UNGE:
12650 return base_bit;
12651 case ORDERED: case UNORDERED:
12652 return base_bit + 3;
12653
12654 case GE: case GEU:
12655 /* If scc, we will have done a cror to put the bit in the
12656 unordered position. So test that bit. For integer, this is ! LT
12657 unless this is an scc insn. */
12658 return scc_p ? base_bit + 3 : base_bit;
12659
12660 case LE: case LEU:
12661 return scc_p ? base_bit + 3 : base_bit + 1;
12662
12663 default:
12664 return -1;
12665 }
12666 }
12667 \f
12668 /* Return the GOT register. */
12669
12670 rtx
12671 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12672 {
12673 /* The second flow pass currently (June 1999) can't update
12674 regs_ever_live without disturbing other parts of the compiler, so
12675 update it here to make the prolog/epilogue code happy. */
12676 if (!can_create_pseudo_p ()
12677 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12678 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12679
12680 crtl->uses_pic_offset_table = 1;
12681
12682 return pic_offset_table_rtx;
12683 }
12684 \f
12685 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12686
12687 /* Write out a function code label. */
12688
12689 void
12690 rs6000_output_function_entry (FILE *file, const char *fname)
12691 {
12692 if (fname[0] != '.')
12693 {
12694 switch (DEFAULT_ABI)
12695 {
12696 default:
12697 gcc_unreachable ();
12698
12699 case ABI_AIX:
12700 if (DOT_SYMBOLS)
12701 putc ('.', file);
12702 else
12703 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12704 break;
12705
12706 case ABI_ELFv2:
12707 case ABI_V4:
12708 case ABI_DARWIN:
12709 break;
12710 }
12711 }
12712
12713 RS6000_OUTPUT_BASENAME (file, fname);
12714 }
12715
12716 /* Print an operand. Recognize special options, documented below. */
12717
12718 #if TARGET_ELF
12719 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12720 only introduced by the linker, when applying the sda21
12721 relocation. */
12722 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12723 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12724 #else
12725 #define SMALL_DATA_RELOC "sda21"
12726 #define SMALL_DATA_REG 0
12727 #endif
12728
12729 void
12730 print_operand (FILE *file, rtx x, int code)
12731 {
12732 int i;
12733 unsigned HOST_WIDE_INT uval;
12734
12735 switch (code)
12736 {
12737 /* %a is output_address. */
12738
12739 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12740 output_operand. */
12741
12742 case 'D':
12743 /* Like 'J' but get to the GT bit only. */
12744 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12745 {
12746 output_operand_lossage ("invalid %%D value");
12747 return;
12748 }
12749
12750 /* Bit 1 is GT bit. */
12751 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12752
12753 /* Add one for shift count in rlinm for scc. */
12754 fprintf (file, "%d", i + 1);
12755 return;
12756
12757 case 'e':
12758 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12759 if (! INT_P (x))
12760 {
12761 output_operand_lossage ("invalid %%e value");
12762 return;
12763 }
12764
12765 uval = INTVAL (x);
12766 if ((uval & 0xffff) == 0 && uval != 0)
12767 putc ('s', file);
12768 return;
12769
12770 case 'E':
12771 /* X is a CR register. Print the number of the EQ bit of the CR */
12772 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12773 output_operand_lossage ("invalid %%E value");
12774 else
12775 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12776 return;
12777
12778 case 'f':
12779 /* X is a CR register. Print the shift count needed to move it
12780 to the high-order four bits. */
12781 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12782 output_operand_lossage ("invalid %%f value");
12783 else
12784 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12785 return;
12786
12787 case 'F':
12788 /* Similar, but print the count for the rotate in the opposite
12789 direction. */
12790 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12791 output_operand_lossage ("invalid %%F value");
12792 else
12793 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12794 return;
12795
12796 case 'G':
12797 /* X is a constant integer. If it is negative, print "m",
12798 otherwise print "z". This is to make an aze or ame insn. */
12799 if (!CONST_INT_P (x))
12800 output_operand_lossage ("invalid %%G value");
12801 else if (INTVAL (x) >= 0)
12802 putc ('z', file);
12803 else
12804 putc ('m', file);
12805 return;
12806
12807 case 'h':
12808 /* If constant, output low-order five bits. Otherwise, write
12809 normally. */
12810 if (INT_P (x))
12811 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12812 else
12813 print_operand (file, x, 0);
12814 return;
12815
12816 case 'H':
12817 /* If constant, output low-order six bits. Otherwise, write
12818 normally. */
12819 if (INT_P (x))
12820 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12821 else
12822 print_operand (file, x, 0);
12823 return;
12824
12825 case 'I':
12826 /* Print `i' if this is a constant, else nothing. */
12827 if (INT_P (x))
12828 putc ('i', file);
12829 return;
12830
12831 case 'j':
12832 /* Write the bit number in CCR for jump. */
12833 i = ccr_bit (x, 0);
12834 if (i == -1)
12835 output_operand_lossage ("invalid %%j code");
12836 else
12837 fprintf (file, "%d", i);
12838 return;
12839
12840 case 'J':
12841 /* Similar, but add one for shift count in rlinm for scc and pass
12842 scc flag to `ccr_bit'. */
12843 i = ccr_bit (x, 1);
12844 if (i == -1)
12845 output_operand_lossage ("invalid %%J code");
12846 else
12847 /* If we want bit 31, write a shift count of zero, not 32. */
12848 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12849 return;
12850
12851 case 'k':
12852 /* X must be a constant. Write the 1's complement of the
12853 constant. */
12854 if (! INT_P (x))
12855 output_operand_lossage ("invalid %%k value");
12856 else
12857 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12858 return;
12859
12860 case 'K':
12861 /* X must be a symbolic constant on ELF. Write an
12862 expression suitable for an 'addi' that adds in the low 16
12863 bits of the MEM. */
12864 if (GET_CODE (x) == CONST)
12865 {
12866 if (GET_CODE (XEXP (x, 0)) != PLUS
12867 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12868 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12869 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12870 output_operand_lossage ("invalid %%K value");
12871 }
12872 print_operand_address (file, x);
12873 fputs ("@l", file);
12874 return;
12875
12876 /* %l is output_asm_label. */
12877
12878 case 'L':
12879 /* Write second word of DImode or DFmode reference. Works on register
12880 or non-indexed memory only. */
12881 if (REG_P (x))
12882 fputs (reg_names[REGNO (x) + 1], file);
12883 else if (MEM_P (x))
12884 {
12885 machine_mode mode = GET_MODE (x);
12886 /* Handle possible auto-increment. Since it is pre-increment and
12887 we have already done it, we can just use an offset of word. */
12888 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12889 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12890 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12891 UNITS_PER_WORD));
12892 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12893 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12894 UNITS_PER_WORD));
12895 else
12896 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12897 UNITS_PER_WORD),
12898 0));
12899
12900 if (small_data_operand (x, GET_MODE (x)))
12901 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12902 reg_names[SMALL_DATA_REG]);
12903 }
12904 return;
12905
12906 case 'N': /* Unused */
12907 /* Write the number of elements in the vector times 4. */
12908 if (GET_CODE (x) != PARALLEL)
12909 output_operand_lossage ("invalid %%N value");
12910 else
12911 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12912 return;
12913
12914 case 'O': /* Unused */
12915 /* Similar, but subtract 1 first. */
12916 if (GET_CODE (x) != PARALLEL)
12917 output_operand_lossage ("invalid %%O value");
12918 else
12919 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12920 return;
12921
12922 case 'p':
12923 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12924 if (! INT_P (x)
12925 || INTVAL (x) < 0
12926 || (i = exact_log2 (INTVAL (x))) < 0)
12927 output_operand_lossage ("invalid %%p value");
12928 else
12929 fprintf (file, "%d", i);
12930 return;
12931
12932 case 'P':
12933 /* The operand must be an indirect memory reference. The result
12934 is the register name. */
12935 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12936 || REGNO (XEXP (x, 0)) >= 32)
12937 output_operand_lossage ("invalid %%P value");
12938 else
12939 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12940 return;
12941
12942 case 'q':
12943 /* This outputs the logical code corresponding to a boolean
12944 expression. The expression may have one or both operands
12945 negated (if one, only the first one). For condition register
12946 logical operations, it will also treat the negated
12947 CR codes as NOTs, but not handle NOTs of them. */
12948 {
12949 const char *const *t = 0;
12950 const char *s;
12951 enum rtx_code code = GET_CODE (x);
12952 static const char * const tbl[3][3] = {
12953 { "and", "andc", "nor" },
12954 { "or", "orc", "nand" },
12955 { "xor", "eqv", "xor" } };
12956
12957 if (code == AND)
12958 t = tbl[0];
12959 else if (code == IOR)
12960 t = tbl[1];
12961 else if (code == XOR)
12962 t = tbl[2];
12963 else
12964 output_operand_lossage ("invalid %%q value");
12965
12966 if (GET_CODE (XEXP (x, 0)) != NOT)
12967 s = t[0];
12968 else
12969 {
12970 if (GET_CODE (XEXP (x, 1)) == NOT)
12971 s = t[2];
12972 else
12973 s = t[1];
12974 }
12975
12976 fputs (s, file);
12977 }
12978 return;
12979
12980 case 'Q':
12981 if (! TARGET_MFCRF)
12982 return;
12983 fputc (',', file);
12984 /* FALLTHRU */
12985
12986 case 'R':
12987 /* X is a CR register. Print the mask for `mtcrf'. */
12988 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12989 output_operand_lossage ("invalid %%R value");
12990 else
12991 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12992 return;
12993
12994 case 's':
12995 /* Low 5 bits of 32 - value */
12996 if (! INT_P (x))
12997 output_operand_lossage ("invalid %%s value");
12998 else
12999 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13000 return;
13001
13002 case 't':
13003 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13004 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13005 {
13006 output_operand_lossage ("invalid %%t value");
13007 return;
13008 }
13009
13010 /* Bit 3 is OV bit. */
13011 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13012
13013 /* If we want bit 31, write a shift count of zero, not 32. */
13014 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13015 return;
13016
13017 case 'T':
13018 /* Print the symbolic name of a branch target register. */
13019 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13020 x = XVECEXP (x, 0, 0);
13021 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13022 && REGNO (x) != CTR_REGNO))
13023 output_operand_lossage ("invalid %%T value");
13024 else if (REGNO (x) == LR_REGNO)
13025 fputs ("lr", file);
13026 else
13027 fputs ("ctr", file);
13028 return;
13029
13030 case 'u':
13031 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13032 for use in unsigned operand. */
13033 if (! INT_P (x))
13034 {
13035 output_operand_lossage ("invalid %%u value");
13036 return;
13037 }
13038
13039 uval = INTVAL (x);
13040 if ((uval & 0xffff) == 0)
13041 uval >>= 16;
13042
13043 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13044 return;
13045
13046 case 'v':
13047 /* High-order 16 bits of constant for use in signed operand. */
13048 if (! INT_P (x))
13049 output_operand_lossage ("invalid %%v value");
13050 else
13051 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13052 (INTVAL (x) >> 16) & 0xffff);
13053 return;
13054
13055 case 'U':
13056 /* Print `u' if this has an auto-increment or auto-decrement. */
13057 if (MEM_P (x)
13058 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13059 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13060 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13061 putc ('u', file);
13062 return;
13063
13064 case 'V':
13065 /* Print the trap code for this operand. */
13066 switch (GET_CODE (x))
13067 {
13068 case EQ:
13069 fputs ("eq", file); /* 4 */
13070 break;
13071 case NE:
13072 fputs ("ne", file); /* 24 */
13073 break;
13074 case LT:
13075 fputs ("lt", file); /* 16 */
13076 break;
13077 case LE:
13078 fputs ("le", file); /* 20 */
13079 break;
13080 case GT:
13081 fputs ("gt", file); /* 8 */
13082 break;
13083 case GE:
13084 fputs ("ge", file); /* 12 */
13085 break;
13086 case LTU:
13087 fputs ("llt", file); /* 2 */
13088 break;
13089 case LEU:
13090 fputs ("lle", file); /* 6 */
13091 break;
13092 case GTU:
13093 fputs ("lgt", file); /* 1 */
13094 break;
13095 case GEU:
13096 fputs ("lge", file); /* 5 */
13097 break;
13098 default:
13099 output_operand_lossage ("invalid %%V value");
13100 }
13101 break;
13102
13103 case 'w':
13104 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13105 normally. */
13106 if (INT_P (x))
13107 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13108 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13109 else
13110 print_operand (file, x, 0);
13111 return;
13112
13113 case 'x':
13114 /* X is a FPR or Altivec register used in a VSX context. */
13115 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13116 output_operand_lossage ("invalid %%x value");
13117 else
13118 {
13119 int reg = REGNO (x);
13120 int vsx_reg = (FP_REGNO_P (reg)
13121 ? reg - 32
13122 : reg - FIRST_ALTIVEC_REGNO + 32);
13123
13124 #ifdef TARGET_REGNAMES
13125 if (TARGET_REGNAMES)
13126 fprintf (file, "%%vs%d", vsx_reg);
13127 else
13128 #endif
13129 fprintf (file, "%d", vsx_reg);
13130 }
13131 return;
13132
13133 case 'X':
13134 if (MEM_P (x)
13135 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13136 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13137 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13138 putc ('x', file);
13139 return;
13140
13141 case 'Y':
13142 /* Like 'L', for third word of TImode/PTImode */
13143 if (REG_P (x))
13144 fputs (reg_names[REGNO (x) + 2], file);
13145 else if (MEM_P (x))
13146 {
13147 machine_mode mode = GET_MODE (x);
13148 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13149 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13150 output_address (mode, plus_constant (Pmode,
13151 XEXP (XEXP (x, 0), 0), 8));
13152 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13153 output_address (mode, plus_constant (Pmode,
13154 XEXP (XEXP (x, 0), 0), 8));
13155 else
13156 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13157 if (small_data_operand (x, GET_MODE (x)))
13158 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13159 reg_names[SMALL_DATA_REG]);
13160 }
13161 return;
13162
13163 case 'z':
13164 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13165 x = XVECEXP (x, 0, 1);
13166 /* X is a SYMBOL_REF. Write out the name preceded by a
13167 period and without any trailing data in brackets. Used for function
13168 names. If we are configured for System V (or the embedded ABI) on
13169 the PowerPC, do not emit the period, since those systems do not use
13170 TOCs and the like. */
13171 if (!SYMBOL_REF_P (x))
13172 {
13173 output_operand_lossage ("invalid %%z value");
13174 return;
13175 }
13176
13177 /* For macho, check to see if we need a stub. */
13178 if (TARGET_MACHO)
13179 {
13180 const char *name = XSTR (x, 0);
13181 #if TARGET_MACHO
13182 if (darwin_symbol_stubs
13183 && MACHOPIC_INDIRECT
13184 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13185 name = machopic_indirection_name (x, /*stub_p=*/true);
13186 #endif
13187 assemble_name (file, name);
13188 }
13189 else if (!DOT_SYMBOLS)
13190 assemble_name (file, XSTR (x, 0));
13191 else
13192 rs6000_output_function_entry (file, XSTR (x, 0));
13193 return;
13194
13195 case 'Z':
13196 /* Like 'L', for last word of TImode/PTImode. */
13197 if (REG_P (x))
13198 fputs (reg_names[REGNO (x) + 3], file);
13199 else if (MEM_P (x))
13200 {
13201 machine_mode mode = GET_MODE (x);
13202 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13203 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13204 output_address (mode, plus_constant (Pmode,
13205 XEXP (XEXP (x, 0), 0), 12));
13206 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13207 output_address (mode, plus_constant (Pmode,
13208 XEXP (XEXP (x, 0), 0), 12));
13209 else
13210 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13211 if (small_data_operand (x, GET_MODE (x)))
13212 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13213 reg_names[SMALL_DATA_REG]);
13214 }
13215 return;
13216
13217 /* Print AltiVec memory operand. */
13218 case 'y':
13219 {
13220 rtx tmp;
13221
13222 gcc_assert (MEM_P (x));
13223
13224 tmp = XEXP (x, 0);
13225
13226 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13227 && GET_CODE (tmp) == AND
13228 && CONST_INT_P (XEXP (tmp, 1))
13229 && INTVAL (XEXP (tmp, 1)) == -16)
13230 tmp = XEXP (tmp, 0);
13231 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13232 && GET_CODE (tmp) == PRE_MODIFY)
13233 tmp = XEXP (tmp, 1);
13234 if (REG_P (tmp))
13235 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13236 else
13237 {
13238 if (GET_CODE (tmp) != PLUS
13239 || !REG_P (XEXP (tmp, 0))
13240 || !REG_P (XEXP (tmp, 1)))
13241 {
13242 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13243 break;
13244 }
13245
13246 if (REGNO (XEXP (tmp, 0)) == 0)
13247 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13248 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13249 else
13250 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13251 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13252 }
13253 break;
13254 }
13255
13256 case 0:
13257 if (REG_P (x))
13258 fprintf (file, "%s", reg_names[REGNO (x)]);
13259 else if (MEM_P (x))
13260 {
13261 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13262 know the width from the mode. */
13263 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13264 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13265 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13266 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13267 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13268 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13269 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13270 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13271 else
13272 output_address (GET_MODE (x), XEXP (x, 0));
13273 }
13274 else if (toc_relative_expr_p (x, false,
13275 &tocrel_base_oac, &tocrel_offset_oac))
13276 /* This hack along with a corresponding hack in
13277 rs6000_output_addr_const_extra arranges to output addends
13278 where the assembler expects to find them. eg.
13279 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13280 without this hack would be output as "x@toc+4". We
13281 want "x+4@toc". */
13282 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13283 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13284 output_addr_const (file, XVECEXP (x, 0, 0));
13285 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13286 output_addr_const (file, XVECEXP (x, 0, 1));
13287 else
13288 output_addr_const (file, x);
13289 return;
13290
13291 case '&':
13292 if (const char *name = get_some_local_dynamic_name ())
13293 assemble_name (file, name);
13294 else
13295 output_operand_lossage ("'%%&' used without any "
13296 "local dynamic TLS references");
13297 return;
13298
13299 default:
13300 output_operand_lossage ("invalid %%xn code");
13301 }
13302 }
13303 \f
13304 /* Print the address of an operand. */
13305
13306 void
13307 print_operand_address (FILE *file, rtx x)
13308 {
13309 if (REG_P (x))
13310 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13311
13312 /* Is it a PC-relative address? */
13313 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13314 {
13315 HOST_WIDE_INT offset;
13316
13317 if (GET_CODE (x) == CONST)
13318 x = XEXP (x, 0);
13319
13320 if (GET_CODE (x) == PLUS)
13321 {
13322 offset = INTVAL (XEXP (x, 1));
13323 x = XEXP (x, 0);
13324 }
13325 else
13326 offset = 0;
13327
13328 output_addr_const (file, x);
13329
13330 if (offset)
13331 fprintf (file, "%+" PRId64, offset);
13332
13333 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13334 fprintf (file, "@got");
13335
13336 fprintf (file, "@pcrel");
13337 }
13338 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13339 || GET_CODE (x) == LABEL_REF)
13340 {
13341 output_addr_const (file, x);
13342 if (small_data_operand (x, GET_MODE (x)))
13343 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13344 reg_names[SMALL_DATA_REG]);
13345 else
13346 gcc_assert (!TARGET_TOC);
13347 }
13348 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13349 && REG_P (XEXP (x, 1)))
13350 {
13351 if (REGNO (XEXP (x, 0)) == 0)
13352 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13353 reg_names[ REGNO (XEXP (x, 0)) ]);
13354 else
13355 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13356 reg_names[ REGNO (XEXP (x, 1)) ]);
13357 }
13358 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13359 && CONST_INT_P (XEXP (x, 1)))
13360 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13361 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13362 #if TARGET_MACHO
13363 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13364 && CONSTANT_P (XEXP (x, 1)))
13365 {
13366 fprintf (file, "lo16(");
13367 output_addr_const (file, XEXP (x, 1));
13368 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13369 }
13370 #endif
13371 #if TARGET_ELF
13372 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13373 && CONSTANT_P (XEXP (x, 1)))
13374 {
13375 output_addr_const (file, XEXP (x, 1));
13376 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13377 }
13378 #endif
13379 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13380 {
13381 /* This hack along with a corresponding hack in
13382 rs6000_output_addr_const_extra arranges to output addends
13383 where the assembler expects to find them. eg.
13384 (lo_sum (reg 9)
13385 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13386 without this hack would be output as "x@toc+8@l(9)". We
13387 want "x+8@toc@l(9)". */
13388 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13389 if (GET_CODE (x) == LO_SUM)
13390 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13391 else
13392 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13393 }
13394 else
13395 output_addr_const (file, x);
13396 }
13397 \f
13398 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13399
13400 bool
13401 rs6000_output_addr_const_extra (FILE *file, rtx x)
13402 {
13403 if (GET_CODE (x) == UNSPEC)
13404 switch (XINT (x, 1))
13405 {
13406 case UNSPEC_TOCREL:
13407 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13408 && REG_P (XVECEXP (x, 0, 1))
13409 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13410 output_addr_const (file, XVECEXP (x, 0, 0));
13411 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13412 {
13413 if (INTVAL (tocrel_offset_oac) >= 0)
13414 fprintf (file, "+");
13415 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13416 }
13417 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13418 {
13419 putc ('-', file);
13420 assemble_name (file, toc_label_name);
13421 need_toc_init = 1;
13422 }
13423 else if (TARGET_ELF)
13424 fputs ("@toc", file);
13425 return true;
13426
13427 #if TARGET_MACHO
13428 case UNSPEC_MACHOPIC_OFFSET:
13429 output_addr_const (file, XVECEXP (x, 0, 0));
13430 putc ('-', file);
13431 machopic_output_function_base_name (file);
13432 return true;
13433 #endif
13434 }
13435 return false;
13436 }
13437 \f
13438 /* Target hook for assembling integer objects. The PowerPC version has
13439 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13440 is defined. It also needs to handle DI-mode objects on 64-bit
13441 targets. */
13442
13443 static bool
13444 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13445 {
13446 #ifdef RELOCATABLE_NEEDS_FIXUP
13447 /* Special handling for SI values. */
13448 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13449 {
13450 static int recurse = 0;
13451
13452 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13453 the .fixup section. Since the TOC section is already relocated, we
13454 don't need to mark it here. We used to skip the text section, but it
13455 should never be valid for relocated addresses to be placed in the text
13456 section. */
13457 if (DEFAULT_ABI == ABI_V4
13458 && (TARGET_RELOCATABLE || flag_pic > 1)
13459 && in_section != toc_section
13460 && !recurse
13461 && !CONST_SCALAR_INT_P (x)
13462 && CONSTANT_P (x))
13463 {
13464 char buf[256];
13465
13466 recurse = 1;
13467 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13468 fixuplabelno++;
13469 ASM_OUTPUT_LABEL (asm_out_file, buf);
13470 fprintf (asm_out_file, "\t.long\t(");
13471 output_addr_const (asm_out_file, x);
13472 fprintf (asm_out_file, ")@fixup\n");
13473 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13474 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13475 fprintf (asm_out_file, "\t.long\t");
13476 assemble_name (asm_out_file, buf);
13477 fprintf (asm_out_file, "\n\t.previous\n");
13478 recurse = 0;
13479 return true;
13480 }
13481 /* Remove initial .'s to turn a -mcall-aixdesc function
13482 address into the address of the descriptor, not the function
13483 itself. */
13484 else if (SYMBOL_REF_P (x)
13485 && XSTR (x, 0)[0] == '.'
13486 && DEFAULT_ABI == ABI_AIX)
13487 {
13488 const char *name = XSTR (x, 0);
13489 while (*name == '.')
13490 name++;
13491
13492 fprintf (asm_out_file, "\t.long\t%s\n", name);
13493 return true;
13494 }
13495 }
13496 #endif /* RELOCATABLE_NEEDS_FIXUP */
13497 return default_assemble_integer (x, size, aligned_p);
13498 }
13499
13500 /* Return a template string for assembly to emit when making an
13501 external call. FUNOP is the call mem argument operand number. */
13502
13503 static const char *
13504 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13505 {
13506 /* -Wformat-overflow workaround, without which gcc thinks that %u
13507 might produce 10 digits. */
13508 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13509
13510 char arg[12];
13511 arg[0] = 0;
13512 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13513 {
13514 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13515 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13516 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13517 sprintf (arg, "(%%&@tlsld)");
13518 }
13519
13520 /* The magic 32768 offset here corresponds to the offset of
13521 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13522 char z[11];
13523 sprintf (z, "%%z%u%s", funop,
13524 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13525 ? "+32768" : ""));
13526
13527 static char str[32]; /* 1 spare */
13528 if (rs6000_pcrel_p (cfun))
13529 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13530 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13531 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13532 sibcall ? "" : "\n\tnop");
13533 else if (DEFAULT_ABI == ABI_V4)
13534 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13535 flag_pic ? "@plt" : "");
13536 #if TARGET_MACHO
13537 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13538 else if (DEFAULT_ABI == ABI_DARWIN)
13539 {
13540 /* The cookie is in operand func+2. */
13541 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13542 int cookie = INTVAL (operands[funop + 2]);
13543 if (cookie & CALL_LONG)
13544 {
13545 tree funname = get_identifier (XSTR (operands[funop], 0));
13546 tree labelname = get_prev_label (funname);
13547 gcc_checking_assert (labelname && !sibcall);
13548
13549 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13550 instruction will reach 'foo', otherwise link as 'bl L42'".
13551 "L42" should be a 'branch island', that will do a far jump to
13552 'foo'. Branch islands are generated in
13553 macho_branch_islands(). */
13554 sprintf (str, "jbsr %%z%u,%.10s", funop,
13555 IDENTIFIER_POINTER (labelname));
13556 }
13557 else
13558 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13559 after the call. */
13560 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13561 }
13562 #endif
13563 else
13564 gcc_unreachable ();
13565 return str;
13566 }
13567
13568 const char *
13569 rs6000_call_template (rtx *operands, unsigned int funop)
13570 {
13571 return rs6000_call_template_1 (operands, funop, false);
13572 }
13573
13574 const char *
13575 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13576 {
13577 return rs6000_call_template_1 (operands, funop, true);
13578 }
13579
13580 /* As above, for indirect calls. */
13581
13582 static const char *
13583 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13584 bool sibcall)
13585 {
13586 /* -Wformat-overflow workaround, without which gcc thinks that %u
13587 might produce 10 digits. Note that -Wformat-overflow will not
13588 currently warn here for str[], so do not rely on a warning to
13589 ensure str[] is correctly sized. */
13590 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13591
13592 /* Currently, funop is either 0 or 1. The maximum string is always
13593 a !speculate 64-bit __tls_get_addr call.
13594
13595 ABI_ELFv2, pcrel:
13596 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13597 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13598 . 9 crset 2\n\t
13599 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13600 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13601 . 8 beq%T1l-
13602 .---
13603 .142
13604
13605 ABI_AIX:
13606 . 9 ld 2,%3\n\t
13607 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13608 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13609 . 9 crset 2\n\t
13610 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13611 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13612 . 10 beq%T1l-\n\t
13613 . 10 ld 2,%4(1)
13614 .---
13615 .151
13616
13617 ABI_ELFv2:
13618 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13619 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13620 . 9 crset 2\n\t
13621 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13622 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13623 . 10 beq%T1l-\n\t
13624 . 10 ld 2,%3(1)
13625 .---
13626 .142
13627
13628 ABI_V4:
13629 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13630 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13631 . 9 crset 2\n\t
13632 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13633 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13634 . 8 beq%T1l-
13635 .---
13636 .141 */
13637 static char str[160]; /* 8 spare */
13638 char *s = str;
13639 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13640
13641 if (DEFAULT_ABI == ABI_AIX)
13642 s += sprintf (s,
13643 "l%s 2,%%%u\n\t",
13644 ptrload, funop + 3);
13645
13646 /* We don't need the extra code to stop indirect call speculation if
13647 calling via LR. */
13648 bool speculate = (TARGET_MACHO
13649 || rs6000_speculate_indirect_jumps
13650 || (REG_P (operands[funop])
13651 && REGNO (operands[funop]) == LR_REGNO));
13652
13653 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13654 {
13655 const char *rel64 = TARGET_64BIT ? "64" : "";
13656 char tls[29];
13657 tls[0] = 0;
13658 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13659 {
13660 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13661 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13662 rel64, funop + 1);
13663 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13664 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13665 rel64);
13666 }
13667
13668 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13669 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13670 && flag_pic == 2 ? "+32768" : "");
13671 if (!speculate)
13672 {
13673 s += sprintf (s,
13674 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13675 tls, rel64, notoc, funop, addend);
13676 s += sprintf (s, "crset 2\n\t");
13677 }
13678 s += sprintf (s,
13679 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13680 tls, rel64, notoc, funop, addend);
13681 }
13682 else if (!speculate)
13683 s += sprintf (s, "crset 2\n\t");
13684
13685 if (rs6000_pcrel_p (cfun))
13686 {
13687 if (speculate)
13688 sprintf (s, "b%%T%ul", funop);
13689 else
13690 sprintf (s, "beq%%T%ul-", funop);
13691 }
13692 else if (DEFAULT_ABI == ABI_AIX)
13693 {
13694 if (speculate)
13695 sprintf (s,
13696 "b%%T%ul\n\t"
13697 "l%s 2,%%%u(1)",
13698 funop, ptrload, funop + 4);
13699 else
13700 sprintf (s,
13701 "beq%%T%ul-\n\t"
13702 "l%s 2,%%%u(1)",
13703 funop, ptrload, funop + 4);
13704 }
13705 else if (DEFAULT_ABI == ABI_ELFv2)
13706 {
13707 if (speculate)
13708 sprintf (s,
13709 "b%%T%ul\n\t"
13710 "l%s 2,%%%u(1)",
13711 funop, ptrload, funop + 3);
13712 else
13713 sprintf (s,
13714 "beq%%T%ul-\n\t"
13715 "l%s 2,%%%u(1)",
13716 funop, ptrload, funop + 3);
13717 }
13718 else
13719 {
13720 if (speculate)
13721 sprintf (s,
13722 "b%%T%u%s",
13723 funop, sibcall ? "" : "l");
13724 else
13725 sprintf (s,
13726 "beq%%T%u%s-%s",
13727 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13728 }
13729 return str;
13730 }
13731
13732 const char *
13733 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13734 {
13735 return rs6000_indirect_call_template_1 (operands, funop, false);
13736 }
13737
13738 const char *
13739 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13740 {
13741 return rs6000_indirect_call_template_1 (operands, funop, true);
13742 }
13743
13744 #if HAVE_AS_PLTSEQ
13745 /* Output indirect call insns. WHICH identifies the type of sequence. */
13746 const char *
13747 rs6000_pltseq_template (rtx *operands, int which)
13748 {
13749 const char *rel64 = TARGET_64BIT ? "64" : "";
13750 char tls[30];
13751 tls[0] = 0;
13752 if (GET_CODE (operands[3]) == UNSPEC)
13753 {
13754 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13755 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13756 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13757 off, rel64);
13758 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13759 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13760 off, rel64);
13761 }
13762
13763 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13764 static char str[96]; /* 10 spare */
13765 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13766 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13767 && flag_pic == 2 ? "+32768" : "");
13768 switch (which)
13769 {
13770 case RS6000_PLTSEQ_TOCSAVE:
13771 sprintf (str,
13772 "st%s\n\t"
13773 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13774 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13775 tls, rel64);
13776 break;
13777 case RS6000_PLTSEQ_PLT16_HA:
13778 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13779 sprintf (str,
13780 "lis %%0,0\n\t"
13781 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13782 tls, off, rel64);
13783 else
13784 sprintf (str,
13785 "addis %%0,%%1,0\n\t"
13786 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13787 tls, off, rel64, addend);
13788 break;
13789 case RS6000_PLTSEQ_PLT16_LO:
13790 sprintf (str,
13791 "l%s %%0,0(%%1)\n\t"
13792 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13793 TARGET_64BIT ? "d" : "wz",
13794 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13795 break;
13796 case RS6000_PLTSEQ_MTCTR:
13797 sprintf (str,
13798 "mtctr %%1\n\t"
13799 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13800 tls, rel64, addend);
13801 break;
13802 case RS6000_PLTSEQ_PLT_PCREL34:
13803 sprintf (str,
13804 "pl%s %%0,0(0),1\n\t"
13805 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13806 TARGET_64BIT ? "d" : "wz",
13807 tls, rel64);
13808 break;
13809 default:
13810 gcc_unreachable ();
13811 }
13812 return str;
13813 }
13814 #endif
13815 \f
13816 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13817 /* Emit an assembler directive to set symbol visibility for DECL to
13818 VISIBILITY_TYPE. */
13819
13820 static void
13821 rs6000_assemble_visibility (tree decl, int vis)
13822 {
13823 if (TARGET_XCOFF)
13824 return;
13825
13826 /* Functions need to have their entry point symbol visibility set as
13827 well as their descriptor symbol visibility. */
13828 if (DEFAULT_ABI == ABI_AIX
13829 && DOT_SYMBOLS
13830 && TREE_CODE (decl) == FUNCTION_DECL)
13831 {
13832 static const char * const visibility_types[] = {
13833 NULL, "protected", "hidden", "internal"
13834 };
13835
13836 const char *name, *type;
13837
13838 name = ((* targetm.strip_name_encoding)
13839 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13840 type = visibility_types[vis];
13841
13842 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13843 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13844 }
13845 else
13846 default_assemble_visibility (decl, vis);
13847 }
13848 #endif
13849 \f
13850 enum rtx_code
13851 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13852 {
13853 /* Reversal of FP compares takes care -- an ordered compare
13854 becomes an unordered compare and vice versa. */
13855 if (mode == CCFPmode
13856 && (!flag_finite_math_only
13857 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13858 || code == UNEQ || code == LTGT))
13859 return reverse_condition_maybe_unordered (code);
13860 else
13861 return reverse_condition (code);
13862 }
13863
13864 /* Generate a compare for CODE. Return a brand-new rtx that
13865 represents the result of the compare. */
13866
13867 static rtx
13868 rs6000_generate_compare (rtx cmp, machine_mode mode)
13869 {
13870 machine_mode comp_mode;
13871 rtx compare_result;
13872 enum rtx_code code = GET_CODE (cmp);
13873 rtx op0 = XEXP (cmp, 0);
13874 rtx op1 = XEXP (cmp, 1);
13875
13876 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13877 comp_mode = CCmode;
13878 else if (FLOAT_MODE_P (mode))
13879 comp_mode = CCFPmode;
13880 else if (code == GTU || code == LTU
13881 || code == GEU || code == LEU)
13882 comp_mode = CCUNSmode;
13883 else if ((code == EQ || code == NE)
13884 && unsigned_reg_p (op0)
13885 && (unsigned_reg_p (op1)
13886 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13887 /* These are unsigned values, perhaps there will be a later
13888 ordering compare that can be shared with this one. */
13889 comp_mode = CCUNSmode;
13890 else
13891 comp_mode = CCmode;
13892
13893 /* If we have an unsigned compare, make sure we don't have a signed value as
13894 an immediate. */
13895 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13896 && INTVAL (op1) < 0)
13897 {
13898 op0 = copy_rtx_if_shared (op0);
13899 op1 = force_reg (GET_MODE (op0), op1);
13900 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13901 }
13902
13903 /* First, the compare. */
13904 compare_result = gen_reg_rtx (comp_mode);
13905
13906 /* IEEE 128-bit support in VSX registers when we do not have hardware
13907 support. */
13908 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13909 {
13910 rtx libfunc = NULL_RTX;
13911 bool check_nan = false;
13912 rtx dest;
13913
13914 switch (code)
13915 {
13916 case EQ:
13917 case NE:
13918 libfunc = optab_libfunc (eq_optab, mode);
13919 break;
13920
13921 case GT:
13922 case GE:
13923 libfunc = optab_libfunc (ge_optab, mode);
13924 break;
13925
13926 case LT:
13927 case LE:
13928 libfunc = optab_libfunc (le_optab, mode);
13929 break;
13930
13931 case UNORDERED:
13932 case ORDERED:
13933 libfunc = optab_libfunc (unord_optab, mode);
13934 code = (code == UNORDERED) ? NE : EQ;
13935 break;
13936
13937 case UNGE:
13938 case UNGT:
13939 check_nan = true;
13940 libfunc = optab_libfunc (ge_optab, mode);
13941 code = (code == UNGE) ? GE : GT;
13942 break;
13943
13944 case UNLE:
13945 case UNLT:
13946 check_nan = true;
13947 libfunc = optab_libfunc (le_optab, mode);
13948 code = (code == UNLE) ? LE : LT;
13949 break;
13950
13951 case UNEQ:
13952 case LTGT:
13953 check_nan = true;
13954 libfunc = optab_libfunc (eq_optab, mode);
13955 code = (code = UNEQ) ? EQ : NE;
13956 break;
13957
13958 default:
13959 gcc_unreachable ();
13960 }
13961
13962 gcc_assert (libfunc);
13963
13964 if (!check_nan)
13965 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13966 SImode, op0, mode, op1, mode);
13967
13968 /* The library signals an exception for signalling NaNs, so we need to
13969 handle isgreater, etc. by first checking isordered. */
13970 else
13971 {
13972 rtx ne_rtx, normal_dest, unord_dest;
13973 rtx unord_func = optab_libfunc (unord_optab, mode);
13974 rtx join_label = gen_label_rtx ();
13975 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13976 rtx unord_cmp = gen_reg_rtx (comp_mode);
13977
13978
13979 /* Test for either value being a NaN. */
13980 gcc_assert (unord_func);
13981 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13982 SImode, op0, mode, op1, mode);
13983
13984 /* Set value (0) if either value is a NaN, and jump to the join
13985 label. */
13986 dest = gen_reg_rtx (SImode);
13987 emit_move_insn (dest, const1_rtx);
13988 emit_insn (gen_rtx_SET (unord_cmp,
13989 gen_rtx_COMPARE (comp_mode, unord_dest,
13990 const0_rtx)));
13991
13992 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13993 emit_jump_insn (gen_rtx_SET (pc_rtx,
13994 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13995 join_ref,
13996 pc_rtx)));
13997
13998 /* Do the normal comparison, knowing that the values are not
13999 NaNs. */
14000 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14001 SImode, op0, mode, op1, mode);
14002
14003 emit_insn (gen_cstoresi4 (dest,
14004 gen_rtx_fmt_ee (code, SImode, normal_dest,
14005 const0_rtx),
14006 normal_dest, const0_rtx));
14007
14008 /* Join NaN and non-Nan paths. Compare dest against 0. */
14009 emit_label (join_label);
14010 code = NE;
14011 }
14012
14013 emit_insn (gen_rtx_SET (compare_result,
14014 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14015 }
14016
14017 else
14018 {
14019 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14020 CLOBBERs to match cmptf_internal2 pattern. */
14021 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14022 && FLOAT128_IBM_P (GET_MODE (op0))
14023 && TARGET_HARD_FLOAT)
14024 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14025 gen_rtvec (10,
14026 gen_rtx_SET (compare_result,
14027 gen_rtx_COMPARE (comp_mode, op0, op1)),
14028 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14029 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14030 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14031 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14032 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14033 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14034 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14035 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14036 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14037 else if (GET_CODE (op1) == UNSPEC
14038 && XINT (op1, 1) == UNSPEC_SP_TEST)
14039 {
14040 rtx op1b = XVECEXP (op1, 0, 0);
14041 comp_mode = CCEQmode;
14042 compare_result = gen_reg_rtx (CCEQmode);
14043 if (TARGET_64BIT)
14044 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14045 else
14046 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14047 }
14048 else
14049 emit_insn (gen_rtx_SET (compare_result,
14050 gen_rtx_COMPARE (comp_mode, op0, op1)));
14051 }
14052
14053 validate_condition_mode (code, GET_MODE (compare_result));
14054
14055 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14056 }
14057
14058 \f
14059 /* Return the diagnostic message string if the binary operation OP is
14060 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14061
14062 static const char*
14063 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14064 const_tree type1,
14065 const_tree type2)
14066 {
14067 machine_mode mode1 = TYPE_MODE (type1);
14068 machine_mode mode2 = TYPE_MODE (type2);
14069
14070 /* For complex modes, use the inner type. */
14071 if (COMPLEX_MODE_P (mode1))
14072 mode1 = GET_MODE_INNER (mode1);
14073
14074 if (COMPLEX_MODE_P (mode2))
14075 mode2 = GET_MODE_INNER (mode2);
14076
14077 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14078 double to intermix unless -mfloat128-convert. */
14079 if (mode1 == mode2)
14080 return NULL;
14081
14082 if (!TARGET_FLOAT128_CVT)
14083 {
14084 if ((mode1 == KFmode && mode2 == IFmode)
14085 || (mode1 == IFmode && mode2 == KFmode))
14086 return N_("__float128 and __ibm128 cannot be used in the same "
14087 "expression");
14088
14089 if (TARGET_IEEEQUAD
14090 && ((mode1 == IFmode && mode2 == TFmode)
14091 || (mode1 == TFmode && mode2 == IFmode)))
14092 return N_("__ibm128 and long double cannot be used in the same "
14093 "expression");
14094
14095 if (!TARGET_IEEEQUAD
14096 && ((mode1 == KFmode && mode2 == TFmode)
14097 || (mode1 == TFmode && mode2 == KFmode)))
14098 return N_("__float128 and long double cannot be used in the same "
14099 "expression");
14100 }
14101
14102 return NULL;
14103 }
14104
14105 \f
14106 /* Expand floating point conversion to/from __float128 and __ibm128. */
14107
14108 void
14109 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14110 {
14111 machine_mode dest_mode = GET_MODE (dest);
14112 machine_mode src_mode = GET_MODE (src);
14113 convert_optab cvt = unknown_optab;
14114 bool do_move = false;
14115 rtx libfunc = NULL_RTX;
14116 rtx dest2;
14117 typedef rtx (*rtx_2func_t) (rtx, rtx);
14118 rtx_2func_t hw_convert = (rtx_2func_t)0;
14119 size_t kf_or_tf;
14120
14121 struct hw_conv_t {
14122 rtx_2func_t from_df;
14123 rtx_2func_t from_sf;
14124 rtx_2func_t from_si_sign;
14125 rtx_2func_t from_si_uns;
14126 rtx_2func_t from_di_sign;
14127 rtx_2func_t from_di_uns;
14128 rtx_2func_t to_df;
14129 rtx_2func_t to_sf;
14130 rtx_2func_t to_si_sign;
14131 rtx_2func_t to_si_uns;
14132 rtx_2func_t to_di_sign;
14133 rtx_2func_t to_di_uns;
14134 } hw_conversions[2] = {
14135 /* convertions to/from KFmode */
14136 {
14137 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14138 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14139 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14140 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14141 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14142 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14143 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14144 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14145 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14146 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14147 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14148 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14149 },
14150
14151 /* convertions to/from TFmode */
14152 {
14153 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14154 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14155 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14156 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14157 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14158 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14159 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14160 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14161 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14162 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14163 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14164 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14165 },
14166 };
14167
14168 if (dest_mode == src_mode)
14169 gcc_unreachable ();
14170
14171 /* Eliminate memory operations. */
14172 if (MEM_P (src))
14173 src = force_reg (src_mode, src);
14174
14175 if (MEM_P (dest))
14176 {
14177 rtx tmp = gen_reg_rtx (dest_mode);
14178 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14179 rs6000_emit_move (dest, tmp, dest_mode);
14180 return;
14181 }
14182
14183 /* Convert to IEEE 128-bit floating point. */
14184 if (FLOAT128_IEEE_P (dest_mode))
14185 {
14186 if (dest_mode == KFmode)
14187 kf_or_tf = 0;
14188 else if (dest_mode == TFmode)
14189 kf_or_tf = 1;
14190 else
14191 gcc_unreachable ();
14192
14193 switch (src_mode)
14194 {
14195 case E_DFmode:
14196 cvt = sext_optab;
14197 hw_convert = hw_conversions[kf_or_tf].from_df;
14198 break;
14199
14200 case E_SFmode:
14201 cvt = sext_optab;
14202 hw_convert = hw_conversions[kf_or_tf].from_sf;
14203 break;
14204
14205 case E_KFmode:
14206 case E_IFmode:
14207 case E_TFmode:
14208 if (FLOAT128_IBM_P (src_mode))
14209 cvt = sext_optab;
14210 else
14211 do_move = true;
14212 break;
14213
14214 case E_SImode:
14215 if (unsigned_p)
14216 {
14217 cvt = ufloat_optab;
14218 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14219 }
14220 else
14221 {
14222 cvt = sfloat_optab;
14223 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14224 }
14225 break;
14226
14227 case E_DImode:
14228 if (unsigned_p)
14229 {
14230 cvt = ufloat_optab;
14231 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14232 }
14233 else
14234 {
14235 cvt = sfloat_optab;
14236 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14237 }
14238 break;
14239
14240 default:
14241 gcc_unreachable ();
14242 }
14243 }
14244
14245 /* Convert from IEEE 128-bit floating point. */
14246 else if (FLOAT128_IEEE_P (src_mode))
14247 {
14248 if (src_mode == KFmode)
14249 kf_or_tf = 0;
14250 else if (src_mode == TFmode)
14251 kf_or_tf = 1;
14252 else
14253 gcc_unreachable ();
14254
14255 switch (dest_mode)
14256 {
14257 case E_DFmode:
14258 cvt = trunc_optab;
14259 hw_convert = hw_conversions[kf_or_tf].to_df;
14260 break;
14261
14262 case E_SFmode:
14263 cvt = trunc_optab;
14264 hw_convert = hw_conversions[kf_or_tf].to_sf;
14265 break;
14266
14267 case E_KFmode:
14268 case E_IFmode:
14269 case E_TFmode:
14270 if (FLOAT128_IBM_P (dest_mode))
14271 cvt = trunc_optab;
14272 else
14273 do_move = true;
14274 break;
14275
14276 case E_SImode:
14277 if (unsigned_p)
14278 {
14279 cvt = ufix_optab;
14280 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14281 }
14282 else
14283 {
14284 cvt = sfix_optab;
14285 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14286 }
14287 break;
14288
14289 case E_DImode:
14290 if (unsigned_p)
14291 {
14292 cvt = ufix_optab;
14293 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14294 }
14295 else
14296 {
14297 cvt = sfix_optab;
14298 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14299 }
14300 break;
14301
14302 default:
14303 gcc_unreachable ();
14304 }
14305 }
14306
14307 /* Both IBM format. */
14308 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14309 do_move = true;
14310
14311 else
14312 gcc_unreachable ();
14313
14314 /* Handle conversion between TFmode/KFmode/IFmode. */
14315 if (do_move)
14316 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14317
14318 /* Handle conversion if we have hardware support. */
14319 else if (TARGET_FLOAT128_HW && hw_convert)
14320 emit_insn ((hw_convert) (dest, src));
14321
14322 /* Call an external function to do the conversion. */
14323 else if (cvt != unknown_optab)
14324 {
14325 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14326 gcc_assert (libfunc != NULL_RTX);
14327
14328 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14329 src, src_mode);
14330
14331 gcc_assert (dest2 != NULL_RTX);
14332 if (!rtx_equal_p (dest, dest2))
14333 emit_move_insn (dest, dest2);
14334 }
14335
14336 else
14337 gcc_unreachable ();
14338
14339 return;
14340 }
14341
14342 \f
14343 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14344 can be used as that dest register. Return the dest register. */
14345
14346 rtx
14347 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14348 {
14349 if (op2 == const0_rtx)
14350 return op1;
14351
14352 if (GET_CODE (scratch) == SCRATCH)
14353 scratch = gen_reg_rtx (mode);
14354
14355 if (logical_operand (op2, mode))
14356 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14357 else
14358 emit_insn (gen_rtx_SET (scratch,
14359 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14360
14361 return scratch;
14362 }
14363
14364 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14365 requires this. The result is mode MODE. */
14366 rtx
14367 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14368 {
14369 rtx cond[2];
14370 int n = 0;
14371 if (code == LTGT || code == LE || code == UNLT)
14372 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14373 if (code == LTGT || code == GE || code == UNGT)
14374 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14375 if (code == LE || code == GE || code == UNEQ)
14376 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14377 if (code == UNLT || code == UNGT || code == UNEQ)
14378 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14379
14380 gcc_assert (n == 2);
14381
14382 rtx cc = gen_reg_rtx (CCEQmode);
14383 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14384 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14385
14386 return cc;
14387 }
14388
14389 void
14390 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14391 {
14392 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14393 rtx_code cond_code = GET_CODE (condition_rtx);
14394
14395 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14396 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14397 ;
14398 else if (cond_code == NE
14399 || cond_code == GE || cond_code == LE
14400 || cond_code == GEU || cond_code == LEU
14401 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14402 {
14403 rtx not_result = gen_reg_rtx (CCEQmode);
14404 rtx not_op, rev_cond_rtx;
14405 machine_mode cc_mode;
14406
14407 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14408
14409 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14410 SImode, XEXP (condition_rtx, 0), const0_rtx);
14411 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14412 emit_insn (gen_rtx_SET (not_result, not_op));
14413 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14414 }
14415
14416 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14417 if (op_mode == VOIDmode)
14418 op_mode = GET_MODE (XEXP (operands[1], 1));
14419
14420 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14421 {
14422 PUT_MODE (condition_rtx, DImode);
14423 convert_move (operands[0], condition_rtx, 0);
14424 }
14425 else
14426 {
14427 PUT_MODE (condition_rtx, SImode);
14428 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14429 }
14430 }
14431
14432 /* Emit a branch of kind CODE to location LOC. */
14433
14434 void
14435 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14436 {
14437 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14438 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14439 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14440 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14441 }
14442
14443 /* Return the string to output a conditional branch to LABEL, which is
14444 the operand template of the label, or NULL if the branch is really a
14445 conditional return.
14446
14447 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14448 condition code register and its mode specifies what kind of
14449 comparison we made.
14450
14451 REVERSED is nonzero if we should reverse the sense of the comparison.
14452
14453 INSN is the insn. */
14454
14455 char *
14456 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14457 {
14458 static char string[64];
14459 enum rtx_code code = GET_CODE (op);
14460 rtx cc_reg = XEXP (op, 0);
14461 machine_mode mode = GET_MODE (cc_reg);
14462 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14463 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14464 int really_reversed = reversed ^ need_longbranch;
14465 char *s = string;
14466 const char *ccode;
14467 const char *pred;
14468 rtx note;
14469
14470 validate_condition_mode (code, mode);
14471
14472 /* Work out which way this really branches. We could use
14473 reverse_condition_maybe_unordered here always but this
14474 makes the resulting assembler clearer. */
14475 if (really_reversed)
14476 {
14477 /* Reversal of FP compares takes care -- an ordered compare
14478 becomes an unordered compare and vice versa. */
14479 if (mode == CCFPmode)
14480 code = reverse_condition_maybe_unordered (code);
14481 else
14482 code = reverse_condition (code);
14483 }
14484
14485 switch (code)
14486 {
14487 /* Not all of these are actually distinct opcodes, but
14488 we distinguish them for clarity of the resulting assembler. */
14489 case NE: case LTGT:
14490 ccode = "ne"; break;
14491 case EQ: case UNEQ:
14492 ccode = "eq"; break;
14493 case GE: case GEU:
14494 ccode = "ge"; break;
14495 case GT: case GTU: case UNGT:
14496 ccode = "gt"; break;
14497 case LE: case LEU:
14498 ccode = "le"; break;
14499 case LT: case LTU: case UNLT:
14500 ccode = "lt"; break;
14501 case UNORDERED: ccode = "un"; break;
14502 case ORDERED: ccode = "nu"; break;
14503 case UNGE: ccode = "nl"; break;
14504 case UNLE: ccode = "ng"; break;
14505 default:
14506 gcc_unreachable ();
14507 }
14508
14509 /* Maybe we have a guess as to how likely the branch is. */
14510 pred = "";
14511 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14512 if (note != NULL_RTX)
14513 {
14514 /* PROB is the difference from 50%. */
14515 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14516 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14517
14518 /* Only hint for highly probable/improbable branches on newer cpus when
14519 we have real profile data, as static prediction overrides processor
14520 dynamic prediction. For older cpus we may as well always hint, but
14521 assume not taken for branches that are very close to 50% as a
14522 mispredicted taken branch is more expensive than a
14523 mispredicted not-taken branch. */
14524 if (rs6000_always_hint
14525 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14526 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14527 && br_prob_note_reliable_p (note)))
14528 {
14529 if (abs (prob) > REG_BR_PROB_BASE / 20
14530 && ((prob > 0) ^ need_longbranch))
14531 pred = "+";
14532 else
14533 pred = "-";
14534 }
14535 }
14536
14537 if (label == NULL)
14538 s += sprintf (s, "b%slr%s ", ccode, pred);
14539 else
14540 s += sprintf (s, "b%s%s ", ccode, pred);
14541
14542 /* We need to escape any '%' characters in the reg_names string.
14543 Assume they'd only be the first character.... */
14544 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14545 *s++ = '%';
14546 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14547
14548 if (label != NULL)
14549 {
14550 /* If the branch distance was too far, we may have to use an
14551 unconditional branch to go the distance. */
14552 if (need_longbranch)
14553 s += sprintf (s, ",$+8\n\tb %s", label);
14554 else
14555 s += sprintf (s, ",%s", label);
14556 }
14557
14558 return string;
14559 }
14560
14561 /* Return insn for VSX or Altivec comparisons. */
14562
14563 static rtx
14564 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14565 {
14566 rtx mask;
14567 machine_mode mode = GET_MODE (op0);
14568
14569 switch (code)
14570 {
14571 default:
14572 break;
14573
14574 case GE:
14575 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14576 return NULL_RTX;
14577 /* FALLTHRU */
14578
14579 case EQ:
14580 case GT:
14581 case GTU:
14582 case ORDERED:
14583 case UNORDERED:
14584 case UNEQ:
14585 case LTGT:
14586 mask = gen_reg_rtx (mode);
14587 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14588 return mask;
14589 }
14590
14591 return NULL_RTX;
14592 }
14593
14594 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14595 DMODE is expected destination mode. This is a recursive function. */
14596
14597 static rtx
14598 rs6000_emit_vector_compare (enum rtx_code rcode,
14599 rtx op0, rtx op1,
14600 machine_mode dmode)
14601 {
14602 rtx mask;
14603 bool swap_operands = false;
14604 bool try_again = false;
14605
14606 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14607 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14608
14609 /* See if the comparison works as is. */
14610 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14611 if (mask)
14612 return mask;
14613
14614 switch (rcode)
14615 {
14616 case LT:
14617 rcode = GT;
14618 swap_operands = true;
14619 try_again = true;
14620 break;
14621 case LTU:
14622 rcode = GTU;
14623 swap_operands = true;
14624 try_again = true;
14625 break;
14626 case NE:
14627 case UNLE:
14628 case UNLT:
14629 case UNGE:
14630 case UNGT:
14631 /* Invert condition and try again.
14632 e.g., A != B becomes ~(A==B). */
14633 {
14634 enum rtx_code rev_code;
14635 enum insn_code nor_code;
14636 rtx mask2;
14637
14638 rev_code = reverse_condition_maybe_unordered (rcode);
14639 if (rev_code == UNKNOWN)
14640 return NULL_RTX;
14641
14642 nor_code = optab_handler (one_cmpl_optab, dmode);
14643 if (nor_code == CODE_FOR_nothing)
14644 return NULL_RTX;
14645
14646 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14647 if (!mask2)
14648 return NULL_RTX;
14649
14650 mask = gen_reg_rtx (dmode);
14651 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14652 return mask;
14653 }
14654 break;
14655 case GE:
14656 case GEU:
14657 case LE:
14658 case LEU:
14659 /* Try GT/GTU/LT/LTU OR EQ */
14660 {
14661 rtx c_rtx, eq_rtx;
14662 enum insn_code ior_code;
14663 enum rtx_code new_code;
14664
14665 switch (rcode)
14666 {
14667 case GE:
14668 new_code = GT;
14669 break;
14670
14671 case GEU:
14672 new_code = GTU;
14673 break;
14674
14675 case LE:
14676 new_code = LT;
14677 break;
14678
14679 case LEU:
14680 new_code = LTU;
14681 break;
14682
14683 default:
14684 gcc_unreachable ();
14685 }
14686
14687 ior_code = optab_handler (ior_optab, dmode);
14688 if (ior_code == CODE_FOR_nothing)
14689 return NULL_RTX;
14690
14691 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14692 if (!c_rtx)
14693 return NULL_RTX;
14694
14695 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14696 if (!eq_rtx)
14697 return NULL_RTX;
14698
14699 mask = gen_reg_rtx (dmode);
14700 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14701 return mask;
14702 }
14703 break;
14704 default:
14705 return NULL_RTX;
14706 }
14707
14708 if (try_again)
14709 {
14710 if (swap_operands)
14711 std::swap (op0, op1);
14712
14713 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14714 if (mask)
14715 return mask;
14716 }
14717
14718 /* You only get two chances. */
14719 return NULL_RTX;
14720 }
14721
14722 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14723 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14724 operands for the relation operation COND. */
14725
14726 int
14727 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14728 rtx cond, rtx cc_op0, rtx cc_op1)
14729 {
14730 machine_mode dest_mode = GET_MODE (dest);
14731 machine_mode mask_mode = GET_MODE (cc_op0);
14732 enum rtx_code rcode = GET_CODE (cond);
14733 machine_mode cc_mode = CCmode;
14734 rtx mask;
14735 rtx cond2;
14736 bool invert_move = false;
14737
14738 if (VECTOR_UNIT_NONE_P (dest_mode))
14739 return 0;
14740
14741 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14742 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14743
14744 switch (rcode)
14745 {
14746 /* Swap operands if we can, and fall back to doing the operation as
14747 specified, and doing a NOR to invert the test. */
14748 case NE:
14749 case UNLE:
14750 case UNLT:
14751 case UNGE:
14752 case UNGT:
14753 /* Invert condition and try again.
14754 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14755 invert_move = true;
14756 rcode = reverse_condition_maybe_unordered (rcode);
14757 if (rcode == UNKNOWN)
14758 return 0;
14759 break;
14760
14761 case GE:
14762 case LE:
14763 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14764 {
14765 /* Invert condition to avoid compound test. */
14766 invert_move = true;
14767 rcode = reverse_condition (rcode);
14768 }
14769 break;
14770
14771 case GTU:
14772 case GEU:
14773 case LTU:
14774 case LEU:
14775 /* Mark unsigned tests with CCUNSmode. */
14776 cc_mode = CCUNSmode;
14777
14778 /* Invert condition to avoid compound test if necessary. */
14779 if (rcode == GEU || rcode == LEU)
14780 {
14781 invert_move = true;
14782 rcode = reverse_condition (rcode);
14783 }
14784 break;
14785
14786 default:
14787 break;
14788 }
14789
14790 /* Get the vector mask for the given relational operations. */
14791 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14792
14793 if (!mask)
14794 return 0;
14795
14796 if (invert_move)
14797 std::swap (op_true, op_false);
14798
14799 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14800 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14801 && (GET_CODE (op_true) == CONST_VECTOR
14802 || GET_CODE (op_false) == CONST_VECTOR))
14803 {
14804 rtx constant_0 = CONST0_RTX (dest_mode);
14805 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14806
14807 if (op_true == constant_m1 && op_false == constant_0)
14808 {
14809 emit_move_insn (dest, mask);
14810 return 1;
14811 }
14812
14813 else if (op_true == constant_0 && op_false == constant_m1)
14814 {
14815 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14816 return 1;
14817 }
14818
14819 /* If we can't use the vector comparison directly, perhaps we can use
14820 the mask for the true or false fields, instead of loading up a
14821 constant. */
14822 if (op_true == constant_m1)
14823 op_true = mask;
14824
14825 if (op_false == constant_0)
14826 op_false = mask;
14827 }
14828
14829 if (!REG_P (op_true) && !SUBREG_P (op_true))
14830 op_true = force_reg (dest_mode, op_true);
14831
14832 if (!REG_P (op_false) && !SUBREG_P (op_false))
14833 op_false = force_reg (dest_mode, op_false);
14834
14835 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14836 CONST0_RTX (dest_mode));
14837 emit_insn (gen_rtx_SET (dest,
14838 gen_rtx_IF_THEN_ELSE (dest_mode,
14839 cond2,
14840 op_true,
14841 op_false)));
14842 return 1;
14843 }
14844
14845 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14846 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14847 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14848 hardware has no such operation. */
14849
14850 static int
14851 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14852 {
14853 enum rtx_code code = GET_CODE (op);
14854 rtx op0 = XEXP (op, 0);
14855 rtx op1 = XEXP (op, 1);
14856 machine_mode compare_mode = GET_MODE (op0);
14857 machine_mode result_mode = GET_MODE (dest);
14858 bool max_p = false;
14859
14860 if (result_mode != compare_mode)
14861 return 0;
14862
14863 if (code == GE || code == GT)
14864 max_p = true;
14865 else if (code == LE || code == LT)
14866 max_p = false;
14867 else
14868 return 0;
14869
14870 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14871 ;
14872
14873 /* Only when NaNs and signed-zeros are not in effect, smax could be
14874 used for `op0 < op1 ? op1 : op0`, and smin could be used for
14875 `op0 > op1 ? op1 : op0`. */
14876 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
14877 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
14878 max_p = !max_p;
14879
14880 else
14881 return 0;
14882
14883 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14884 return 1;
14885 }
14886
14887 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14888 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14889 operands of the last comparison is nonzero/true, FALSE_COND if it is
14890 zero/false. Return 0 if the hardware has no such operation. */
14891
14892 static int
14893 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14894 {
14895 enum rtx_code code = GET_CODE (op);
14896 rtx op0 = XEXP (op, 0);
14897 rtx op1 = XEXP (op, 1);
14898 machine_mode result_mode = GET_MODE (dest);
14899 rtx compare_rtx;
14900 rtx cmove_rtx;
14901 rtx clobber_rtx;
14902
14903 if (!can_create_pseudo_p ())
14904 return 0;
14905
14906 switch (code)
14907 {
14908 case EQ:
14909 case GE:
14910 case GT:
14911 break;
14912
14913 case NE:
14914 case LT:
14915 case LE:
14916 code = swap_condition (code);
14917 std::swap (op0, op1);
14918 break;
14919
14920 default:
14921 return 0;
14922 }
14923
14924 /* Generate: [(parallel [(set (dest)
14925 (if_then_else (op (cmp1) (cmp2))
14926 (true)
14927 (false)))
14928 (clobber (scratch))])]. */
14929
14930 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14931 cmove_rtx = gen_rtx_SET (dest,
14932 gen_rtx_IF_THEN_ELSE (result_mode,
14933 compare_rtx,
14934 true_cond,
14935 false_cond));
14936
14937 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14938 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14939 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14940
14941 return 1;
14942 }
14943
14944 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14945 operands of the last comparison is nonzero/true, FALSE_COND if it
14946 is zero/false. Return 0 if the hardware has no such operation. */
14947
14948 int
14949 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14950 {
14951 enum rtx_code code = GET_CODE (op);
14952 rtx op0 = XEXP (op, 0);
14953 rtx op1 = XEXP (op, 1);
14954 machine_mode compare_mode = GET_MODE (op0);
14955 machine_mode result_mode = GET_MODE (dest);
14956 rtx temp;
14957 bool is_against_zero;
14958
14959 /* These modes should always match. */
14960 if (GET_MODE (op1) != compare_mode
14961 /* In the isel case however, we can use a compare immediate, so
14962 op1 may be a small constant. */
14963 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14964 return 0;
14965 if (GET_MODE (true_cond) != result_mode)
14966 return 0;
14967 if (GET_MODE (false_cond) != result_mode)
14968 return 0;
14969
14970 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14971 if (TARGET_P9_MINMAX
14972 && (compare_mode == SFmode || compare_mode == DFmode)
14973 && (result_mode == SFmode || result_mode == DFmode))
14974 {
14975 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14976 return 1;
14977
14978 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14979 return 1;
14980 }
14981
14982 /* Don't allow using floating point comparisons for integer results for
14983 now. */
14984 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14985 return 0;
14986
14987 /* First, work out if the hardware can do this at all, or
14988 if it's too slow.... */
14989 if (!FLOAT_MODE_P (compare_mode))
14990 {
14991 if (TARGET_ISEL)
14992 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14993 return 0;
14994 }
14995
14996 is_against_zero = op1 == CONST0_RTX (compare_mode);
14997
14998 /* A floating-point subtract might overflow, underflow, or produce
14999 an inexact result, thus changing the floating-point flags, so it
15000 can't be generated if we care about that. It's safe if one side
15001 of the construct is zero, since then no subtract will be
15002 generated. */
15003 if (SCALAR_FLOAT_MODE_P (compare_mode)
15004 && flag_trapping_math && ! is_against_zero)
15005 return 0;
15006
15007 /* Eliminate half of the comparisons by switching operands, this
15008 makes the remaining code simpler. */
15009 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15010 || code == LTGT || code == LT || code == UNLE)
15011 {
15012 code = reverse_condition_maybe_unordered (code);
15013 temp = true_cond;
15014 true_cond = false_cond;
15015 false_cond = temp;
15016 }
15017
15018 /* UNEQ and LTGT take four instructions for a comparison with zero,
15019 it'll probably be faster to use a branch here too. */
15020 if (code == UNEQ && HONOR_NANS (compare_mode))
15021 return 0;
15022
15023 /* We're going to try to implement comparisons by performing
15024 a subtract, then comparing against zero. Unfortunately,
15025 Inf - Inf is NaN which is not zero, and so if we don't
15026 know that the operand is finite and the comparison
15027 would treat EQ different to UNORDERED, we can't do it. */
15028 if (HONOR_INFINITIES (compare_mode)
15029 && code != GT && code != UNGE
15030 && (!CONST_DOUBLE_P (op1)
15031 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15032 /* Constructs of the form (a OP b ? a : b) are safe. */
15033 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15034 || (! rtx_equal_p (op0, true_cond)
15035 && ! rtx_equal_p (op1, true_cond))))
15036 return 0;
15037
15038 /* At this point we know we can use fsel. */
15039
15040 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15041 is no fsel instruction. */
15042 if (compare_mode != SFmode && compare_mode != DFmode)
15043 return 0;
15044
15045 /* Reduce the comparison to a comparison against zero. */
15046 if (! is_against_zero)
15047 {
15048 temp = gen_reg_rtx (compare_mode);
15049 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15050 op0 = temp;
15051 op1 = CONST0_RTX (compare_mode);
15052 }
15053
15054 /* If we don't care about NaNs we can reduce some of the comparisons
15055 down to faster ones. */
15056 if (! HONOR_NANS (compare_mode))
15057 switch (code)
15058 {
15059 case GT:
15060 code = LE;
15061 temp = true_cond;
15062 true_cond = false_cond;
15063 false_cond = temp;
15064 break;
15065 case UNGE:
15066 code = GE;
15067 break;
15068 case UNEQ:
15069 code = EQ;
15070 break;
15071 default:
15072 break;
15073 }
15074
15075 /* Now, reduce everything down to a GE. */
15076 switch (code)
15077 {
15078 case GE:
15079 break;
15080
15081 case LE:
15082 temp = gen_reg_rtx (compare_mode);
15083 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15084 op0 = temp;
15085 break;
15086
15087 case ORDERED:
15088 temp = gen_reg_rtx (compare_mode);
15089 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15090 op0 = temp;
15091 break;
15092
15093 case EQ:
15094 temp = gen_reg_rtx (compare_mode);
15095 emit_insn (gen_rtx_SET (temp,
15096 gen_rtx_NEG (compare_mode,
15097 gen_rtx_ABS (compare_mode, op0))));
15098 op0 = temp;
15099 break;
15100
15101 case UNGE:
15102 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15103 temp = gen_reg_rtx (result_mode);
15104 emit_insn (gen_rtx_SET (temp,
15105 gen_rtx_IF_THEN_ELSE (result_mode,
15106 gen_rtx_GE (VOIDmode,
15107 op0, op1),
15108 true_cond, false_cond)));
15109 false_cond = true_cond;
15110 true_cond = temp;
15111
15112 temp = gen_reg_rtx (compare_mode);
15113 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15114 op0 = temp;
15115 break;
15116
15117 case GT:
15118 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15119 temp = gen_reg_rtx (result_mode);
15120 emit_insn (gen_rtx_SET (temp,
15121 gen_rtx_IF_THEN_ELSE (result_mode,
15122 gen_rtx_GE (VOIDmode,
15123 op0, op1),
15124 true_cond, false_cond)));
15125 true_cond = false_cond;
15126 false_cond = temp;
15127
15128 temp = gen_reg_rtx (compare_mode);
15129 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15130 op0 = temp;
15131 break;
15132
15133 default:
15134 gcc_unreachable ();
15135 }
15136
15137 emit_insn (gen_rtx_SET (dest,
15138 gen_rtx_IF_THEN_ELSE (result_mode,
15139 gen_rtx_GE (VOIDmode,
15140 op0, op1),
15141 true_cond, false_cond)));
15142 return 1;
15143 }
15144
15145 /* Same as above, but for ints (isel). */
15146
15147 int
15148 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15149 {
15150 rtx condition_rtx, cr;
15151 machine_mode mode = GET_MODE (dest);
15152 enum rtx_code cond_code;
15153 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15154 bool signedp;
15155
15156 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15157 return 0;
15158
15159 /* We still have to do the compare, because isel doesn't do a
15160 compare, it just looks at the CRx bits set by a previous compare
15161 instruction. */
15162 condition_rtx = rs6000_generate_compare (op, mode);
15163 cond_code = GET_CODE (condition_rtx);
15164 cr = XEXP (condition_rtx, 0);
15165 signedp = GET_MODE (cr) == CCmode;
15166
15167 isel_func = (mode == SImode
15168 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15169 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15170
15171 switch (cond_code)
15172 {
15173 case LT: case GT: case LTU: case GTU: case EQ:
15174 /* isel handles these directly. */
15175 break;
15176
15177 default:
15178 /* We need to swap the sense of the comparison. */
15179 {
15180 std::swap (false_cond, true_cond);
15181 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15182 }
15183 break;
15184 }
15185
15186 false_cond = force_reg (mode, false_cond);
15187 if (true_cond != const0_rtx)
15188 true_cond = force_reg (mode, true_cond);
15189
15190 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15191
15192 return 1;
15193 }
15194
15195 void
15196 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15197 {
15198 machine_mode mode = GET_MODE (op0);
15199 enum rtx_code c;
15200 rtx target;
15201
15202 /* VSX/altivec have direct min/max insns. */
15203 if ((code == SMAX || code == SMIN)
15204 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15205 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15206 {
15207 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15208 return;
15209 }
15210
15211 if (code == SMAX || code == SMIN)
15212 c = GE;
15213 else
15214 c = GEU;
15215
15216 if (code == SMAX || code == UMAX)
15217 target = emit_conditional_move (dest, c, op0, op1, mode,
15218 op0, op1, mode, 0);
15219 else
15220 target = emit_conditional_move (dest, c, op0, op1, mode,
15221 op1, op0, mode, 0);
15222 gcc_assert (target);
15223 if (target != dest)
15224 emit_move_insn (dest, target);
15225 }
15226
15227 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15228 COND is true. Mark the jump as unlikely to be taken. */
15229
15230 static void
15231 emit_unlikely_jump (rtx cond, rtx label)
15232 {
15233 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15234 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15235 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15236 }
15237
15238 /* A subroutine of the atomic operation splitters. Emit a load-locked
15239 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15240 the zero_extend operation. */
15241
15242 static void
15243 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15244 {
15245 rtx (*fn) (rtx, rtx) = NULL;
15246
15247 switch (mode)
15248 {
15249 case E_QImode:
15250 fn = gen_load_lockedqi;
15251 break;
15252 case E_HImode:
15253 fn = gen_load_lockedhi;
15254 break;
15255 case E_SImode:
15256 if (GET_MODE (mem) == QImode)
15257 fn = gen_load_lockedqi_si;
15258 else if (GET_MODE (mem) == HImode)
15259 fn = gen_load_lockedhi_si;
15260 else
15261 fn = gen_load_lockedsi;
15262 break;
15263 case E_DImode:
15264 fn = gen_load_lockeddi;
15265 break;
15266 case E_TImode:
15267 fn = gen_load_lockedti;
15268 break;
15269 default:
15270 gcc_unreachable ();
15271 }
15272 emit_insn (fn (reg, mem));
15273 }
15274
15275 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15276 instruction in MODE. */
15277
15278 static void
15279 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15280 {
15281 rtx (*fn) (rtx, rtx, rtx) = NULL;
15282
15283 switch (mode)
15284 {
15285 case E_QImode:
15286 fn = gen_store_conditionalqi;
15287 break;
15288 case E_HImode:
15289 fn = gen_store_conditionalhi;
15290 break;
15291 case E_SImode:
15292 fn = gen_store_conditionalsi;
15293 break;
15294 case E_DImode:
15295 fn = gen_store_conditionaldi;
15296 break;
15297 case E_TImode:
15298 fn = gen_store_conditionalti;
15299 break;
15300 default:
15301 gcc_unreachable ();
15302 }
15303
15304 /* Emit sync before stwcx. to address PPC405 Erratum. */
15305 if (PPC405_ERRATUM77)
15306 emit_insn (gen_hwsync ());
15307
15308 emit_insn (fn (res, mem, val));
15309 }
15310
15311 /* Expand barriers before and after a load_locked/store_cond sequence. */
15312
15313 static rtx
15314 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15315 {
15316 rtx addr = XEXP (mem, 0);
15317
15318 if (!legitimate_indirect_address_p (addr, reload_completed)
15319 && !legitimate_indexed_address_p (addr, reload_completed))
15320 {
15321 addr = force_reg (Pmode, addr);
15322 mem = replace_equiv_address_nv (mem, addr);
15323 }
15324
15325 switch (model)
15326 {
15327 case MEMMODEL_RELAXED:
15328 case MEMMODEL_CONSUME:
15329 case MEMMODEL_ACQUIRE:
15330 break;
15331 case MEMMODEL_RELEASE:
15332 case MEMMODEL_ACQ_REL:
15333 emit_insn (gen_lwsync ());
15334 break;
15335 case MEMMODEL_SEQ_CST:
15336 emit_insn (gen_hwsync ());
15337 break;
15338 default:
15339 gcc_unreachable ();
15340 }
15341 return mem;
15342 }
15343
15344 static void
15345 rs6000_post_atomic_barrier (enum memmodel model)
15346 {
15347 switch (model)
15348 {
15349 case MEMMODEL_RELAXED:
15350 case MEMMODEL_CONSUME:
15351 case MEMMODEL_RELEASE:
15352 break;
15353 case MEMMODEL_ACQUIRE:
15354 case MEMMODEL_ACQ_REL:
15355 case MEMMODEL_SEQ_CST:
15356 emit_insn (gen_isync ());
15357 break;
15358 default:
15359 gcc_unreachable ();
15360 }
15361 }
15362
15363 /* A subroutine of the various atomic expanders. For sub-word operations,
15364 we must adjust things to operate on SImode. Given the original MEM,
15365 return a new aligned memory. Also build and return the quantities by
15366 which to shift and mask. */
15367
15368 static rtx
15369 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15370 {
15371 rtx addr, align, shift, mask, mem;
15372 HOST_WIDE_INT shift_mask;
15373 machine_mode mode = GET_MODE (orig_mem);
15374
15375 /* For smaller modes, we have to implement this via SImode. */
15376 shift_mask = (mode == QImode ? 0x18 : 0x10);
15377
15378 addr = XEXP (orig_mem, 0);
15379 addr = force_reg (GET_MODE (addr), addr);
15380
15381 /* Aligned memory containing subword. Generate a new memory. We
15382 do not want any of the existing MEM_ATTR data, as we're now
15383 accessing memory outside the original object. */
15384 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15385 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15386 mem = gen_rtx_MEM (SImode, align);
15387 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15388 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15389 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15390
15391 /* Shift amount for subword relative to aligned word. */
15392 shift = gen_reg_rtx (SImode);
15393 addr = gen_lowpart (SImode, addr);
15394 rtx tmp = gen_reg_rtx (SImode);
15395 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15396 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15397 if (BYTES_BIG_ENDIAN)
15398 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15399 shift, 1, OPTAB_LIB_WIDEN);
15400 *pshift = shift;
15401
15402 /* Mask for insertion. */
15403 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15404 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15405 *pmask = mask;
15406
15407 return mem;
15408 }
15409
15410 /* A subroutine of the various atomic expanders. For sub-word operands,
15411 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15412
15413 static rtx
15414 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15415 {
15416 rtx x;
15417
15418 x = gen_reg_rtx (SImode);
15419 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15420 gen_rtx_NOT (SImode, mask),
15421 oldval)));
15422
15423 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15424
15425 return x;
15426 }
15427
15428 /* A subroutine of the various atomic expanders. For sub-word operands,
15429 extract WIDE to NARROW via SHIFT. */
15430
15431 static void
15432 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15433 {
15434 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15435 wide, 1, OPTAB_LIB_WIDEN);
15436 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15437 }
15438
15439 /* Expand an atomic compare and swap operation. */
15440
15441 void
15442 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15443 {
15444 rtx boolval, retval, mem, oldval, newval, cond;
15445 rtx label1, label2, x, mask, shift;
15446 machine_mode mode, orig_mode;
15447 enum memmodel mod_s, mod_f;
15448 bool is_weak;
15449
15450 boolval = operands[0];
15451 retval = operands[1];
15452 mem = operands[2];
15453 oldval = operands[3];
15454 newval = operands[4];
15455 is_weak = (INTVAL (operands[5]) != 0);
15456 mod_s = memmodel_base (INTVAL (operands[6]));
15457 mod_f = memmodel_base (INTVAL (operands[7]));
15458 orig_mode = mode = GET_MODE (mem);
15459
15460 mask = shift = NULL_RTX;
15461 if (mode == QImode || mode == HImode)
15462 {
15463 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15464 lwarx and shift/mask operations. With power8, we need to do the
15465 comparison in SImode, but the store is still done in QI/HImode. */
15466 oldval = convert_modes (SImode, mode, oldval, 1);
15467
15468 if (!TARGET_SYNC_HI_QI)
15469 {
15470 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15471
15472 /* Shift and mask OLDVAL into position with the word. */
15473 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15474 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15475
15476 /* Shift and mask NEWVAL into position within the word. */
15477 newval = convert_modes (SImode, mode, newval, 1);
15478 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15479 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15480 }
15481
15482 /* Prepare to adjust the return value. */
15483 retval = gen_reg_rtx (SImode);
15484 mode = SImode;
15485 }
15486 else if (reg_overlap_mentioned_p (retval, oldval))
15487 oldval = copy_to_reg (oldval);
15488
15489 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15490 oldval = copy_to_mode_reg (mode, oldval);
15491
15492 if (reg_overlap_mentioned_p (retval, newval))
15493 newval = copy_to_reg (newval);
15494
15495 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15496
15497 label1 = NULL_RTX;
15498 if (!is_weak)
15499 {
15500 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15501 emit_label (XEXP (label1, 0));
15502 }
15503 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15504
15505 emit_load_locked (mode, retval, mem);
15506
15507 x = retval;
15508 if (mask)
15509 x = expand_simple_binop (SImode, AND, retval, mask,
15510 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15511
15512 cond = gen_reg_rtx (CCmode);
15513 /* If we have TImode, synthesize a comparison. */
15514 if (mode != TImode)
15515 x = gen_rtx_COMPARE (CCmode, x, oldval);
15516 else
15517 {
15518 rtx xor1_result = gen_reg_rtx (DImode);
15519 rtx xor2_result = gen_reg_rtx (DImode);
15520 rtx or_result = gen_reg_rtx (DImode);
15521 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15522 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15523 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15524 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15525
15526 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15527 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15528 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15529 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15530 }
15531
15532 emit_insn (gen_rtx_SET (cond, x));
15533
15534 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15535 emit_unlikely_jump (x, label2);
15536
15537 x = newval;
15538 if (mask)
15539 x = rs6000_mask_atomic_subword (retval, newval, mask);
15540
15541 emit_store_conditional (orig_mode, cond, mem, x);
15542
15543 if (!is_weak)
15544 {
15545 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15546 emit_unlikely_jump (x, label1);
15547 }
15548
15549 if (!is_mm_relaxed (mod_f))
15550 emit_label (XEXP (label2, 0));
15551
15552 rs6000_post_atomic_barrier (mod_s);
15553
15554 if (is_mm_relaxed (mod_f))
15555 emit_label (XEXP (label2, 0));
15556
15557 if (shift)
15558 rs6000_finish_atomic_subword (operands[1], retval, shift);
15559 else if (mode != GET_MODE (operands[1]))
15560 convert_move (operands[1], retval, 1);
15561
15562 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15563 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15564 emit_insn (gen_rtx_SET (boolval, x));
15565 }
15566
15567 /* Expand an atomic exchange operation. */
15568
15569 void
15570 rs6000_expand_atomic_exchange (rtx operands[])
15571 {
15572 rtx retval, mem, val, cond;
15573 machine_mode mode;
15574 enum memmodel model;
15575 rtx label, x, mask, shift;
15576
15577 retval = operands[0];
15578 mem = operands[1];
15579 val = operands[2];
15580 model = memmodel_base (INTVAL (operands[3]));
15581 mode = GET_MODE (mem);
15582
15583 mask = shift = NULL_RTX;
15584 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15585 {
15586 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15587
15588 /* Shift and mask VAL into position with the word. */
15589 val = convert_modes (SImode, mode, val, 1);
15590 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15591 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15592
15593 /* Prepare to adjust the return value. */
15594 retval = gen_reg_rtx (SImode);
15595 mode = SImode;
15596 }
15597
15598 mem = rs6000_pre_atomic_barrier (mem, model);
15599
15600 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15601 emit_label (XEXP (label, 0));
15602
15603 emit_load_locked (mode, retval, mem);
15604
15605 x = val;
15606 if (mask)
15607 x = rs6000_mask_atomic_subword (retval, val, mask);
15608
15609 cond = gen_reg_rtx (CCmode);
15610 emit_store_conditional (mode, cond, mem, x);
15611
15612 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15613 emit_unlikely_jump (x, label);
15614
15615 rs6000_post_atomic_barrier (model);
15616
15617 if (shift)
15618 rs6000_finish_atomic_subword (operands[0], retval, shift);
15619 }
15620
15621 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15622 to perform. MEM is the memory on which to operate. VAL is the second
15623 operand of the binary operator. BEFORE and AFTER are optional locations to
15624 return the value of MEM either before of after the operation. MODEL_RTX
15625 is a CONST_INT containing the memory model to use. */
15626
15627 void
15628 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15629 rtx orig_before, rtx orig_after, rtx model_rtx)
15630 {
15631 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15632 machine_mode mode = GET_MODE (mem);
15633 machine_mode store_mode = mode;
15634 rtx label, x, cond, mask, shift;
15635 rtx before = orig_before, after = orig_after;
15636
15637 mask = shift = NULL_RTX;
15638 /* On power8, we want to use SImode for the operation. On previous systems,
15639 use the operation in a subword and shift/mask to get the proper byte or
15640 halfword. */
15641 if (mode == QImode || mode == HImode)
15642 {
15643 if (TARGET_SYNC_HI_QI)
15644 {
15645 val = convert_modes (SImode, mode, val, 1);
15646
15647 /* Prepare to adjust the return value. */
15648 before = gen_reg_rtx (SImode);
15649 if (after)
15650 after = gen_reg_rtx (SImode);
15651 mode = SImode;
15652 }
15653 else
15654 {
15655 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15656
15657 /* Shift and mask VAL into position with the word. */
15658 val = convert_modes (SImode, mode, val, 1);
15659 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15660 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15661
15662 switch (code)
15663 {
15664 case IOR:
15665 case XOR:
15666 /* We've already zero-extended VAL. That is sufficient to
15667 make certain that it does not affect other bits. */
15668 mask = NULL;
15669 break;
15670
15671 case AND:
15672 /* If we make certain that all of the other bits in VAL are
15673 set, that will be sufficient to not affect other bits. */
15674 x = gen_rtx_NOT (SImode, mask);
15675 x = gen_rtx_IOR (SImode, x, val);
15676 emit_insn (gen_rtx_SET (val, x));
15677 mask = NULL;
15678 break;
15679
15680 case NOT:
15681 case PLUS:
15682 case MINUS:
15683 /* These will all affect bits outside the field and need
15684 adjustment via MASK within the loop. */
15685 break;
15686
15687 default:
15688 gcc_unreachable ();
15689 }
15690
15691 /* Prepare to adjust the return value. */
15692 before = gen_reg_rtx (SImode);
15693 if (after)
15694 after = gen_reg_rtx (SImode);
15695 store_mode = mode = SImode;
15696 }
15697 }
15698
15699 mem = rs6000_pre_atomic_barrier (mem, model);
15700
15701 label = gen_label_rtx ();
15702 emit_label (label);
15703 label = gen_rtx_LABEL_REF (VOIDmode, label);
15704
15705 if (before == NULL_RTX)
15706 before = gen_reg_rtx (mode);
15707
15708 emit_load_locked (mode, before, mem);
15709
15710 if (code == NOT)
15711 {
15712 x = expand_simple_binop (mode, AND, before, val,
15713 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15714 after = expand_simple_unop (mode, NOT, x, after, 1);
15715 }
15716 else
15717 {
15718 after = expand_simple_binop (mode, code, before, val,
15719 after, 1, OPTAB_LIB_WIDEN);
15720 }
15721
15722 x = after;
15723 if (mask)
15724 {
15725 x = expand_simple_binop (SImode, AND, after, mask,
15726 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15727 x = rs6000_mask_atomic_subword (before, x, mask);
15728 }
15729 else if (store_mode != mode)
15730 x = convert_modes (store_mode, mode, x, 1);
15731
15732 cond = gen_reg_rtx (CCmode);
15733 emit_store_conditional (store_mode, cond, mem, x);
15734
15735 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15736 emit_unlikely_jump (x, label);
15737
15738 rs6000_post_atomic_barrier (model);
15739
15740 if (shift)
15741 {
15742 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15743 then do the calcuations in a SImode register. */
15744 if (orig_before)
15745 rs6000_finish_atomic_subword (orig_before, before, shift);
15746 if (orig_after)
15747 rs6000_finish_atomic_subword (orig_after, after, shift);
15748 }
15749 else if (store_mode != mode)
15750 {
15751 /* QImode/HImode on machines with lbarx/lharx where we do the native
15752 operation and then do the calcuations in a SImode register. */
15753 if (orig_before)
15754 convert_move (orig_before, before, 1);
15755 if (orig_after)
15756 convert_move (orig_after, after, 1);
15757 }
15758 else if (orig_after && after != orig_after)
15759 emit_move_insn (orig_after, after);
15760 }
15761
15762 /* Emit instructions to move SRC to DST. Called by splitters for
15763 multi-register moves. It will emit at most one instruction for
15764 each register that is accessed; that is, it won't emit li/lis pairs
15765 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15766 register. */
15767
15768 void
15769 rs6000_split_multireg_move (rtx dst, rtx src)
15770 {
15771 /* The register number of the first register being moved. */
15772 int reg;
15773 /* The mode that is to be moved. */
15774 machine_mode mode;
15775 /* The mode that the move is being done in, and its size. */
15776 machine_mode reg_mode;
15777 int reg_mode_size;
15778 /* The number of registers that will be moved. */
15779 int nregs;
15780
15781 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15782 mode = GET_MODE (dst);
15783 nregs = hard_regno_nregs (reg, mode);
15784 if (FP_REGNO_P (reg))
15785 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15786 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15787 else if (ALTIVEC_REGNO_P (reg))
15788 reg_mode = V16QImode;
15789 else
15790 reg_mode = word_mode;
15791 reg_mode_size = GET_MODE_SIZE (reg_mode);
15792
15793 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15794
15795 /* TDmode residing in FP registers is special, since the ISA requires that
15796 the lower-numbered word of a register pair is always the most significant
15797 word, even in little-endian mode. This does not match the usual subreg
15798 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15799 the appropriate constituent registers "by hand" in little-endian mode.
15800
15801 Note we do not need to check for destructive overlap here since TDmode
15802 can only reside in even/odd register pairs. */
15803 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15804 {
15805 rtx p_src, p_dst;
15806 int i;
15807
15808 for (i = 0; i < nregs; i++)
15809 {
15810 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15811 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15812 else
15813 p_src = simplify_gen_subreg (reg_mode, src, mode,
15814 i * reg_mode_size);
15815
15816 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15817 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15818 else
15819 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15820 i * reg_mode_size);
15821
15822 emit_insn (gen_rtx_SET (p_dst, p_src));
15823 }
15824
15825 return;
15826 }
15827
15828 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15829 {
15830 /* Move register range backwards, if we might have destructive
15831 overlap. */
15832 int i;
15833 for (i = nregs - 1; i >= 0; i--)
15834 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15835 i * reg_mode_size),
15836 simplify_gen_subreg (reg_mode, src, mode,
15837 i * reg_mode_size)));
15838 }
15839 else
15840 {
15841 int i;
15842 int j = -1;
15843 bool used_update = false;
15844 rtx restore_basereg = NULL_RTX;
15845
15846 if (MEM_P (src) && INT_REGNO_P (reg))
15847 {
15848 rtx breg;
15849
15850 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15851 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15852 {
15853 rtx delta_rtx;
15854 breg = XEXP (XEXP (src, 0), 0);
15855 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15856 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15857 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15858 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15859 src = replace_equiv_address (src, breg);
15860 }
15861 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15862 {
15863 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15864 {
15865 rtx basereg = XEXP (XEXP (src, 0), 0);
15866 if (TARGET_UPDATE)
15867 {
15868 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15869 emit_insn (gen_rtx_SET (ndst,
15870 gen_rtx_MEM (reg_mode,
15871 XEXP (src, 0))));
15872 used_update = true;
15873 }
15874 else
15875 emit_insn (gen_rtx_SET (basereg,
15876 XEXP (XEXP (src, 0), 1)));
15877 src = replace_equiv_address (src, basereg);
15878 }
15879 else
15880 {
15881 rtx basereg = gen_rtx_REG (Pmode, reg);
15882 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15883 src = replace_equiv_address (src, basereg);
15884 }
15885 }
15886
15887 breg = XEXP (src, 0);
15888 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15889 breg = XEXP (breg, 0);
15890
15891 /* If the base register we are using to address memory is
15892 also a destination reg, then change that register last. */
15893 if (REG_P (breg)
15894 && REGNO (breg) >= REGNO (dst)
15895 && REGNO (breg) < REGNO (dst) + nregs)
15896 j = REGNO (breg) - REGNO (dst);
15897 }
15898 else if (MEM_P (dst) && INT_REGNO_P (reg))
15899 {
15900 rtx breg;
15901
15902 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15903 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15904 {
15905 rtx delta_rtx;
15906 breg = XEXP (XEXP (dst, 0), 0);
15907 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15908 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15909 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15910
15911 /* We have to update the breg before doing the store.
15912 Use store with update, if available. */
15913
15914 if (TARGET_UPDATE)
15915 {
15916 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15917 emit_insn (TARGET_32BIT
15918 ? (TARGET_POWERPC64
15919 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15920 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15921 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15922 used_update = true;
15923 }
15924 else
15925 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15926 dst = replace_equiv_address (dst, breg);
15927 }
15928 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15929 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15930 {
15931 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15932 {
15933 rtx basereg = XEXP (XEXP (dst, 0), 0);
15934 if (TARGET_UPDATE)
15935 {
15936 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15937 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15938 XEXP (dst, 0)),
15939 nsrc));
15940 used_update = true;
15941 }
15942 else
15943 emit_insn (gen_rtx_SET (basereg,
15944 XEXP (XEXP (dst, 0), 1)));
15945 dst = replace_equiv_address (dst, basereg);
15946 }
15947 else
15948 {
15949 rtx basereg = XEXP (XEXP (dst, 0), 0);
15950 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15951 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15952 && REG_P (basereg)
15953 && REG_P (offsetreg)
15954 && REGNO (basereg) != REGNO (offsetreg));
15955 if (REGNO (basereg) == 0)
15956 {
15957 rtx tmp = offsetreg;
15958 offsetreg = basereg;
15959 basereg = tmp;
15960 }
15961 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15962 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15963 dst = replace_equiv_address (dst, basereg);
15964 }
15965 }
15966 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15967 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15968 }
15969
15970 for (i = 0; i < nregs; i++)
15971 {
15972 /* Calculate index to next subword. */
15973 ++j;
15974 if (j == nregs)
15975 j = 0;
15976
15977 /* If compiler already emitted move of first word by
15978 store with update, no need to do anything. */
15979 if (j == 0 && used_update)
15980 continue;
15981
15982 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15983 j * reg_mode_size),
15984 simplify_gen_subreg (reg_mode, src, mode,
15985 j * reg_mode_size)));
15986 }
15987 if (restore_basereg != NULL_RTX)
15988 emit_insn (restore_basereg);
15989 }
15990 }
15991
15992 static GTY(()) alias_set_type TOC_alias_set = -1;
15993
15994 alias_set_type
15995 get_TOC_alias_set (void)
15996 {
15997 if (TOC_alias_set == -1)
15998 TOC_alias_set = new_alias_set ();
15999 return TOC_alias_set;
16000 }
16001
16002 /* The mode the ABI uses for a word. This is not the same as word_mode
16003 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16004
16005 static scalar_int_mode
16006 rs6000_abi_word_mode (void)
16007 {
16008 return TARGET_32BIT ? SImode : DImode;
16009 }
16010
16011 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16012 static char *
16013 rs6000_offload_options (void)
16014 {
16015 if (TARGET_64BIT)
16016 return xstrdup ("-foffload-abi=lp64");
16017 else
16018 return xstrdup ("-foffload-abi=ilp32");
16019 }
16020
16021 \f
16022 /* A quick summary of the various types of 'constant-pool tables'
16023 under PowerPC:
16024
16025 Target Flags Name One table per
16026 AIX (none) AIX TOC object file
16027 AIX -mfull-toc AIX TOC object file
16028 AIX -mminimal-toc AIX minimal TOC translation unit
16029 SVR4/EABI (none) SVR4 SDATA object file
16030 SVR4/EABI -fpic SVR4 pic object file
16031 SVR4/EABI -fPIC SVR4 PIC translation unit
16032 SVR4/EABI -mrelocatable EABI TOC function
16033 SVR4/EABI -maix AIX TOC object file
16034 SVR4/EABI -maix -mminimal-toc
16035 AIX minimal TOC translation unit
16036
16037 Name Reg. Set by entries contains:
16038 made by addrs? fp? sum?
16039
16040 AIX TOC 2 crt0 as Y option option
16041 AIX minimal TOC 30 prolog gcc Y Y option
16042 SVR4 SDATA 13 crt0 gcc N Y N
16043 SVR4 pic 30 prolog ld Y not yet N
16044 SVR4 PIC 30 prolog gcc Y option option
16045 EABI TOC 30 prolog gcc Y option option
16046
16047 */
16048
16049 /* Hash functions for the hash table. */
16050
16051 static unsigned
16052 rs6000_hash_constant (rtx k)
16053 {
16054 enum rtx_code code = GET_CODE (k);
16055 machine_mode mode = GET_MODE (k);
16056 unsigned result = (code << 3) ^ mode;
16057 const char *format;
16058 int flen, fidx;
16059
16060 format = GET_RTX_FORMAT (code);
16061 flen = strlen (format);
16062 fidx = 0;
16063
16064 switch (code)
16065 {
16066 case LABEL_REF:
16067 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16068
16069 case CONST_WIDE_INT:
16070 {
16071 int i;
16072 flen = CONST_WIDE_INT_NUNITS (k);
16073 for (i = 0; i < flen; i++)
16074 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16075 return result;
16076 }
16077
16078 case CONST_DOUBLE:
16079 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16080
16081 case CODE_LABEL:
16082 fidx = 3;
16083 break;
16084
16085 default:
16086 break;
16087 }
16088
16089 for (; fidx < flen; fidx++)
16090 switch (format[fidx])
16091 {
16092 case 's':
16093 {
16094 unsigned i, len;
16095 const char *str = XSTR (k, fidx);
16096 len = strlen (str);
16097 result = result * 613 + len;
16098 for (i = 0; i < len; i++)
16099 result = result * 613 + (unsigned) str[i];
16100 break;
16101 }
16102 case 'u':
16103 case 'e':
16104 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16105 break;
16106 case 'i':
16107 case 'n':
16108 result = result * 613 + (unsigned) XINT (k, fidx);
16109 break;
16110 case 'w':
16111 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16112 result = result * 613 + (unsigned) XWINT (k, fidx);
16113 else
16114 {
16115 size_t i;
16116 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16117 result = result * 613 + (unsigned) (XWINT (k, fidx)
16118 >> CHAR_BIT * i);
16119 }
16120 break;
16121 case '0':
16122 break;
16123 default:
16124 gcc_unreachable ();
16125 }
16126
16127 return result;
16128 }
16129
16130 hashval_t
16131 toc_hasher::hash (toc_hash_struct *thc)
16132 {
16133 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16134 }
16135
16136 /* Compare H1 and H2 for equivalence. */
16137
16138 bool
16139 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16140 {
16141 rtx r1 = h1->key;
16142 rtx r2 = h2->key;
16143
16144 if (h1->key_mode != h2->key_mode)
16145 return 0;
16146
16147 return rtx_equal_p (r1, r2);
16148 }
16149
16150 /* These are the names given by the C++ front-end to vtables, and
16151 vtable-like objects. Ideally, this logic should not be here;
16152 instead, there should be some programmatic way of inquiring as
16153 to whether or not an object is a vtable. */
16154
16155 #define VTABLE_NAME_P(NAME) \
16156 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16157 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16158 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16159 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16160 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16161
16162 #ifdef NO_DOLLAR_IN_LABEL
16163 /* Return a GGC-allocated character string translating dollar signs in
16164 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16165
16166 const char *
16167 rs6000_xcoff_strip_dollar (const char *name)
16168 {
16169 char *strip, *p;
16170 const char *q;
16171 size_t len;
16172
16173 q = (const char *) strchr (name, '$');
16174
16175 if (q == 0 || q == name)
16176 return name;
16177
16178 len = strlen (name);
16179 strip = XALLOCAVEC (char, len + 1);
16180 strcpy (strip, name);
16181 p = strip + (q - name);
16182 while (p)
16183 {
16184 *p = '_';
16185 p = strchr (p + 1, '$');
16186 }
16187
16188 return ggc_alloc_string (strip, len);
16189 }
16190 #endif
16191
16192 void
16193 rs6000_output_symbol_ref (FILE *file, rtx x)
16194 {
16195 const char *name = XSTR (x, 0);
16196
16197 /* Currently C++ toc references to vtables can be emitted before it
16198 is decided whether the vtable is public or private. If this is
16199 the case, then the linker will eventually complain that there is
16200 a reference to an unknown section. Thus, for vtables only,
16201 we emit the TOC reference to reference the identifier and not the
16202 symbol. */
16203 if (VTABLE_NAME_P (name))
16204 {
16205 RS6000_OUTPUT_BASENAME (file, name);
16206 }
16207 else
16208 assemble_name (file, name);
16209 }
16210
16211 /* Output a TOC entry. We derive the entry name from what is being
16212 written. */
16213
16214 void
16215 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16216 {
16217 char buf[256];
16218 const char *name = buf;
16219 rtx base = x;
16220 HOST_WIDE_INT offset = 0;
16221
16222 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16223
16224 /* When the linker won't eliminate them, don't output duplicate
16225 TOC entries (this happens on AIX if there is any kind of TOC,
16226 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16227 CODE_LABELs. */
16228 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16229 {
16230 struct toc_hash_struct *h;
16231
16232 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16233 time because GGC is not initialized at that point. */
16234 if (toc_hash_table == NULL)
16235 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16236
16237 h = ggc_alloc<toc_hash_struct> ();
16238 h->key = x;
16239 h->key_mode = mode;
16240 h->labelno = labelno;
16241
16242 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16243 if (*found == NULL)
16244 *found = h;
16245 else /* This is indeed a duplicate.
16246 Set this label equal to that label. */
16247 {
16248 fputs ("\t.set ", file);
16249 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16250 fprintf (file, "%d,", labelno);
16251 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16252 fprintf (file, "%d\n", ((*found)->labelno));
16253
16254 #ifdef HAVE_AS_TLS
16255 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16256 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16257 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16258 {
16259 fputs ("\t.set ", file);
16260 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16261 fprintf (file, "%d,", labelno);
16262 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16263 fprintf (file, "%d\n", ((*found)->labelno));
16264 }
16265 #endif
16266 return;
16267 }
16268 }
16269
16270 /* If we're going to put a double constant in the TOC, make sure it's
16271 aligned properly when strict alignment is on. */
16272 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16273 && STRICT_ALIGNMENT
16274 && GET_MODE_BITSIZE (mode) >= 64
16275 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16276 ASM_OUTPUT_ALIGN (file, 3);
16277 }
16278
16279 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16280
16281 /* Handle FP constants specially. Note that if we have a minimal
16282 TOC, things we put here aren't actually in the TOC, so we can allow
16283 FP constants. */
16284 if (CONST_DOUBLE_P (x)
16285 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16286 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16287 {
16288 long k[4];
16289
16290 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16291 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16292 else
16293 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16294
16295 if (TARGET_64BIT)
16296 {
16297 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16298 fputs (DOUBLE_INT_ASM_OP, file);
16299 else
16300 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16301 k[0] & 0xffffffff, k[1] & 0xffffffff,
16302 k[2] & 0xffffffff, k[3] & 0xffffffff);
16303 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16304 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16305 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16306 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16307 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16308 return;
16309 }
16310 else
16311 {
16312 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16313 fputs ("\t.long ", file);
16314 else
16315 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16316 k[0] & 0xffffffff, k[1] & 0xffffffff,
16317 k[2] & 0xffffffff, k[3] & 0xffffffff);
16318 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16319 k[0] & 0xffffffff, k[1] & 0xffffffff,
16320 k[2] & 0xffffffff, k[3] & 0xffffffff);
16321 return;
16322 }
16323 }
16324 else if (CONST_DOUBLE_P (x)
16325 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16326 {
16327 long k[2];
16328
16329 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16330 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16331 else
16332 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16333
16334 if (TARGET_64BIT)
16335 {
16336 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16337 fputs (DOUBLE_INT_ASM_OP, file);
16338 else
16339 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16340 k[0] & 0xffffffff, k[1] & 0xffffffff);
16341 fprintf (file, "0x%lx%08lx\n",
16342 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16343 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16344 return;
16345 }
16346 else
16347 {
16348 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16349 fputs ("\t.long ", file);
16350 else
16351 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16352 k[0] & 0xffffffff, k[1] & 0xffffffff);
16353 fprintf (file, "0x%lx,0x%lx\n",
16354 k[0] & 0xffffffff, k[1] & 0xffffffff);
16355 return;
16356 }
16357 }
16358 else if (CONST_DOUBLE_P (x)
16359 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16360 {
16361 long l;
16362
16363 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16364 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16365 else
16366 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16367
16368 if (TARGET_64BIT)
16369 {
16370 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16371 fputs (DOUBLE_INT_ASM_OP, file);
16372 else
16373 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16374 if (WORDS_BIG_ENDIAN)
16375 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16376 else
16377 fprintf (file, "0x%lx\n", l & 0xffffffff);
16378 return;
16379 }
16380 else
16381 {
16382 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16383 fputs ("\t.long ", file);
16384 else
16385 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16386 fprintf (file, "0x%lx\n", l & 0xffffffff);
16387 return;
16388 }
16389 }
16390 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16391 {
16392 unsigned HOST_WIDE_INT low;
16393 HOST_WIDE_INT high;
16394
16395 low = INTVAL (x) & 0xffffffff;
16396 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16397
16398 /* TOC entries are always Pmode-sized, so when big-endian
16399 smaller integer constants in the TOC need to be padded.
16400 (This is still a win over putting the constants in
16401 a separate constant pool, because then we'd have
16402 to have both a TOC entry _and_ the actual constant.)
16403
16404 For a 32-bit target, CONST_INT values are loaded and shifted
16405 entirely within `low' and can be stored in one TOC entry. */
16406
16407 /* It would be easy to make this work, but it doesn't now. */
16408 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16409
16410 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16411 {
16412 low |= high << 32;
16413 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16414 high = (HOST_WIDE_INT) low >> 32;
16415 low &= 0xffffffff;
16416 }
16417
16418 if (TARGET_64BIT)
16419 {
16420 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16421 fputs (DOUBLE_INT_ASM_OP, file);
16422 else
16423 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16424 (long) high & 0xffffffff, (long) low & 0xffffffff);
16425 fprintf (file, "0x%lx%08lx\n",
16426 (long) high & 0xffffffff, (long) low & 0xffffffff);
16427 return;
16428 }
16429 else
16430 {
16431 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16432 {
16433 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16434 fputs ("\t.long ", file);
16435 else
16436 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16437 (long) high & 0xffffffff, (long) low & 0xffffffff);
16438 fprintf (file, "0x%lx,0x%lx\n",
16439 (long) high & 0xffffffff, (long) low & 0xffffffff);
16440 }
16441 else
16442 {
16443 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16444 fputs ("\t.long ", file);
16445 else
16446 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16447 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16448 }
16449 return;
16450 }
16451 }
16452
16453 if (GET_CODE (x) == CONST)
16454 {
16455 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16456 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16457
16458 base = XEXP (XEXP (x, 0), 0);
16459 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16460 }
16461
16462 switch (GET_CODE (base))
16463 {
16464 case SYMBOL_REF:
16465 name = XSTR (base, 0);
16466 break;
16467
16468 case LABEL_REF:
16469 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16470 CODE_LABEL_NUMBER (XEXP (base, 0)));
16471 break;
16472
16473 case CODE_LABEL:
16474 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16475 break;
16476
16477 default:
16478 gcc_unreachable ();
16479 }
16480
16481 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16482 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16483 else
16484 {
16485 fputs ("\t.tc ", file);
16486 RS6000_OUTPUT_BASENAME (file, name);
16487
16488 if (offset < 0)
16489 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16490 else if (offset)
16491 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16492
16493 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16494 after other TOC symbols, reducing overflow of small TOC access
16495 to [TC] symbols. */
16496 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16497 ? "[TE]," : "[TC],", file);
16498 }
16499
16500 /* Currently C++ toc references to vtables can be emitted before it
16501 is decided whether the vtable is public or private. If this is
16502 the case, then the linker will eventually complain that there is
16503 a TOC reference to an unknown section. Thus, for vtables only,
16504 we emit the TOC reference to reference the symbol and not the
16505 section. */
16506 if (VTABLE_NAME_P (name))
16507 {
16508 RS6000_OUTPUT_BASENAME (file, name);
16509 if (offset < 0)
16510 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16511 else if (offset > 0)
16512 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16513 }
16514 else
16515 output_addr_const (file, x);
16516
16517 #if HAVE_AS_TLS
16518 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16519 {
16520 switch (SYMBOL_REF_TLS_MODEL (base))
16521 {
16522 case 0:
16523 break;
16524 case TLS_MODEL_LOCAL_EXEC:
16525 fputs ("@le", file);
16526 break;
16527 case TLS_MODEL_INITIAL_EXEC:
16528 fputs ("@ie", file);
16529 break;
16530 /* Use global-dynamic for local-dynamic. */
16531 case TLS_MODEL_GLOBAL_DYNAMIC:
16532 case TLS_MODEL_LOCAL_DYNAMIC:
16533 putc ('\n', file);
16534 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16535 fputs ("\t.tc .", file);
16536 RS6000_OUTPUT_BASENAME (file, name);
16537 fputs ("[TC],", file);
16538 output_addr_const (file, x);
16539 fputs ("@m", file);
16540 break;
16541 default:
16542 gcc_unreachable ();
16543 }
16544 }
16545 #endif
16546
16547 putc ('\n', file);
16548 }
16549 \f
16550 /* Output an assembler pseudo-op to write an ASCII string of N characters
16551 starting at P to FILE.
16552
16553 On the RS/6000, we have to do this using the .byte operation and
16554 write out special characters outside the quoted string.
16555 Also, the assembler is broken; very long strings are truncated,
16556 so we must artificially break them up early. */
16557
16558 void
16559 output_ascii (FILE *file, const char *p, int n)
16560 {
16561 char c;
16562 int i, count_string;
16563 const char *for_string = "\t.byte \"";
16564 const char *for_decimal = "\t.byte ";
16565 const char *to_close = NULL;
16566
16567 count_string = 0;
16568 for (i = 0; i < n; i++)
16569 {
16570 c = *p++;
16571 if (c >= ' ' && c < 0177)
16572 {
16573 if (for_string)
16574 fputs (for_string, file);
16575 putc (c, file);
16576
16577 /* Write two quotes to get one. */
16578 if (c == '"')
16579 {
16580 putc (c, file);
16581 ++count_string;
16582 }
16583
16584 for_string = NULL;
16585 for_decimal = "\"\n\t.byte ";
16586 to_close = "\"\n";
16587 ++count_string;
16588
16589 if (count_string >= 512)
16590 {
16591 fputs (to_close, file);
16592
16593 for_string = "\t.byte \"";
16594 for_decimal = "\t.byte ";
16595 to_close = NULL;
16596 count_string = 0;
16597 }
16598 }
16599 else
16600 {
16601 if (for_decimal)
16602 fputs (for_decimal, file);
16603 fprintf (file, "%d", c);
16604
16605 for_string = "\n\t.byte \"";
16606 for_decimal = ", ";
16607 to_close = "\n";
16608 count_string = 0;
16609 }
16610 }
16611
16612 /* Now close the string if we have written one. Then end the line. */
16613 if (to_close)
16614 fputs (to_close, file);
16615 }
16616 \f
16617 /* Generate a unique section name for FILENAME for a section type
16618 represented by SECTION_DESC. Output goes into BUF.
16619
16620 SECTION_DESC can be any string, as long as it is different for each
16621 possible section type.
16622
16623 We name the section in the same manner as xlc. The name begins with an
16624 underscore followed by the filename (after stripping any leading directory
16625 names) with the last period replaced by the string SECTION_DESC. If
16626 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16627 the name. */
16628
16629 void
16630 rs6000_gen_section_name (char **buf, const char *filename,
16631 const char *section_desc)
16632 {
16633 const char *q, *after_last_slash, *last_period = 0;
16634 char *p;
16635 int len;
16636
16637 after_last_slash = filename;
16638 for (q = filename; *q; q++)
16639 {
16640 if (*q == '/')
16641 after_last_slash = q + 1;
16642 else if (*q == '.')
16643 last_period = q;
16644 }
16645
16646 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16647 *buf = (char *) xmalloc (len);
16648
16649 p = *buf;
16650 *p++ = '_';
16651
16652 for (q = after_last_slash; *q; q++)
16653 {
16654 if (q == last_period)
16655 {
16656 strcpy (p, section_desc);
16657 p += strlen (section_desc);
16658 break;
16659 }
16660
16661 else if (ISALNUM (*q))
16662 *p++ = *q;
16663 }
16664
16665 if (last_period == 0)
16666 strcpy (p, section_desc);
16667 else
16668 *p = '\0';
16669 }
16670 \f
16671 /* Emit profile function. */
16672
16673 void
16674 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16675 {
16676 /* Non-standard profiling for kernels, which just saves LR then calls
16677 _mcount without worrying about arg saves. The idea is to change
16678 the function prologue as little as possible as it isn't easy to
16679 account for arg save/restore code added just for _mcount. */
16680 if (TARGET_PROFILE_KERNEL)
16681 return;
16682
16683 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16684 {
16685 #ifndef NO_PROFILE_COUNTERS
16686 # define NO_PROFILE_COUNTERS 0
16687 #endif
16688 if (NO_PROFILE_COUNTERS)
16689 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16690 LCT_NORMAL, VOIDmode);
16691 else
16692 {
16693 char buf[30];
16694 const char *label_name;
16695 rtx fun;
16696
16697 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16698 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16699 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16700
16701 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16702 LCT_NORMAL, VOIDmode, fun, Pmode);
16703 }
16704 }
16705 else if (DEFAULT_ABI == ABI_DARWIN)
16706 {
16707 const char *mcount_name = RS6000_MCOUNT;
16708 int caller_addr_regno = LR_REGNO;
16709
16710 /* Be conservative and always set this, at least for now. */
16711 crtl->uses_pic_offset_table = 1;
16712
16713 #if TARGET_MACHO
16714 /* For PIC code, set up a stub and collect the caller's address
16715 from r0, which is where the prologue puts it. */
16716 if (MACHOPIC_INDIRECT
16717 && crtl->uses_pic_offset_table)
16718 caller_addr_regno = 0;
16719 #endif
16720 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16721 LCT_NORMAL, VOIDmode,
16722 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16723 }
16724 }
16725
16726 /* Write function profiler code. */
16727
16728 void
16729 output_function_profiler (FILE *file, int labelno)
16730 {
16731 char buf[100];
16732
16733 switch (DEFAULT_ABI)
16734 {
16735 default:
16736 gcc_unreachable ();
16737
16738 case ABI_V4:
16739 if (!TARGET_32BIT)
16740 {
16741 warning (0, "no profiling of 64-bit code for this ABI");
16742 return;
16743 }
16744 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16745 fprintf (file, "\tmflr %s\n", reg_names[0]);
16746 if (NO_PROFILE_COUNTERS)
16747 {
16748 asm_fprintf (file, "\tstw %s,4(%s)\n",
16749 reg_names[0], reg_names[1]);
16750 }
16751 else if (TARGET_SECURE_PLT && flag_pic)
16752 {
16753 if (TARGET_LINK_STACK)
16754 {
16755 char name[32];
16756 get_ppc476_thunk_name (name);
16757 asm_fprintf (file, "\tbl %s\n", name);
16758 }
16759 else
16760 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16761 asm_fprintf (file, "\tstw %s,4(%s)\n",
16762 reg_names[0], reg_names[1]);
16763 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16764 asm_fprintf (file, "\taddis %s,%s,",
16765 reg_names[12], reg_names[12]);
16766 assemble_name (file, buf);
16767 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16768 assemble_name (file, buf);
16769 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16770 }
16771 else if (flag_pic == 1)
16772 {
16773 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16774 asm_fprintf (file, "\tstw %s,4(%s)\n",
16775 reg_names[0], reg_names[1]);
16776 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16777 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16778 assemble_name (file, buf);
16779 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16780 }
16781 else if (flag_pic > 1)
16782 {
16783 asm_fprintf (file, "\tstw %s,4(%s)\n",
16784 reg_names[0], reg_names[1]);
16785 /* Now, we need to get the address of the label. */
16786 if (TARGET_LINK_STACK)
16787 {
16788 char name[32];
16789 get_ppc476_thunk_name (name);
16790 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16791 assemble_name (file, buf);
16792 fputs ("-.\n1:", file);
16793 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16794 asm_fprintf (file, "\taddi %s,%s,4\n",
16795 reg_names[11], reg_names[11]);
16796 }
16797 else
16798 {
16799 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16800 assemble_name (file, buf);
16801 fputs ("-.\n1:", file);
16802 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16803 }
16804 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16805 reg_names[0], reg_names[11]);
16806 asm_fprintf (file, "\tadd %s,%s,%s\n",
16807 reg_names[0], reg_names[0], reg_names[11]);
16808 }
16809 else
16810 {
16811 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16812 assemble_name (file, buf);
16813 fputs ("@ha\n", file);
16814 asm_fprintf (file, "\tstw %s,4(%s)\n",
16815 reg_names[0], reg_names[1]);
16816 asm_fprintf (file, "\tla %s,", reg_names[0]);
16817 assemble_name (file, buf);
16818 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16819 }
16820
16821 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16822 fprintf (file, "\tbl %s%s\n",
16823 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16824 break;
16825
16826 case ABI_AIX:
16827 case ABI_ELFv2:
16828 case ABI_DARWIN:
16829 /* Don't do anything, done in output_profile_hook (). */
16830 break;
16831 }
16832 }
16833
16834 \f
16835
16836 /* The following variable value is the last issued insn. */
16837
16838 static rtx_insn *last_scheduled_insn;
16839
16840 /* The following variable helps to balance issuing of load and
16841 store instructions */
16842
16843 static int load_store_pendulum;
16844
16845 /* The following variable helps pair divide insns during scheduling. */
16846 static int divide_cnt;
16847 /* The following variable helps pair and alternate vector and vector load
16848 insns during scheduling. */
16849 static int vec_pairing;
16850
16851
16852 /* Power4 load update and store update instructions are cracked into a
16853 load or store and an integer insn which are executed in the same cycle.
16854 Branches have their own dispatch slot which does not count against the
16855 GCC issue rate, but it changes the program flow so there are no other
16856 instructions to issue in this cycle. */
16857
16858 static int
16859 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16860 {
16861 last_scheduled_insn = insn;
16862 if (GET_CODE (PATTERN (insn)) == USE
16863 || GET_CODE (PATTERN (insn)) == CLOBBER)
16864 {
16865 cached_can_issue_more = more;
16866 return cached_can_issue_more;
16867 }
16868
16869 if (insn_terminates_group_p (insn, current_group))
16870 {
16871 cached_can_issue_more = 0;
16872 return cached_can_issue_more;
16873 }
16874
16875 /* If no reservation, but reach here */
16876 if (recog_memoized (insn) < 0)
16877 return more;
16878
16879 if (rs6000_sched_groups)
16880 {
16881 if (is_microcoded_insn (insn))
16882 cached_can_issue_more = 0;
16883 else if (is_cracked_insn (insn))
16884 cached_can_issue_more = more > 2 ? more - 2 : 0;
16885 else
16886 cached_can_issue_more = more - 1;
16887
16888 return cached_can_issue_more;
16889 }
16890
16891 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16892 return 0;
16893
16894 cached_can_issue_more = more - 1;
16895 return cached_can_issue_more;
16896 }
16897
16898 static int
16899 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16900 {
16901 int r = rs6000_variable_issue_1 (insn, more);
16902 if (verbose)
16903 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16904 return r;
16905 }
16906
16907 /* Adjust the cost of a scheduling dependency. Return the new cost of
16908 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16909
16910 static int
16911 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16912 unsigned int)
16913 {
16914 enum attr_type attr_type;
16915
16916 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16917 return cost;
16918
16919 switch (dep_type)
16920 {
16921 case REG_DEP_TRUE:
16922 {
16923 /* Data dependency; DEP_INSN writes a register that INSN reads
16924 some cycles later. */
16925
16926 /* Separate a load from a narrower, dependent store. */
16927 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16928 || rs6000_tune == PROCESSOR_FUTURE)
16929 && GET_CODE (PATTERN (insn)) == SET
16930 && GET_CODE (PATTERN (dep_insn)) == SET
16931 && MEM_P (XEXP (PATTERN (insn), 1))
16932 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16933 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16934 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16935 return cost + 14;
16936
16937 attr_type = get_attr_type (insn);
16938
16939 switch (attr_type)
16940 {
16941 case TYPE_JMPREG:
16942 /* Tell the first scheduling pass about the latency between
16943 a mtctr and bctr (and mtlr and br/blr). The first
16944 scheduling pass will not know about this latency since
16945 the mtctr instruction, which has the latency associated
16946 to it, will be generated by reload. */
16947 return 4;
16948 case TYPE_BRANCH:
16949 /* Leave some extra cycles between a compare and its
16950 dependent branch, to inhibit expensive mispredicts. */
16951 if ((rs6000_tune == PROCESSOR_PPC603
16952 || rs6000_tune == PROCESSOR_PPC604
16953 || rs6000_tune == PROCESSOR_PPC604e
16954 || rs6000_tune == PROCESSOR_PPC620
16955 || rs6000_tune == PROCESSOR_PPC630
16956 || rs6000_tune == PROCESSOR_PPC750
16957 || rs6000_tune == PROCESSOR_PPC7400
16958 || rs6000_tune == PROCESSOR_PPC7450
16959 || rs6000_tune == PROCESSOR_PPCE5500
16960 || rs6000_tune == PROCESSOR_PPCE6500
16961 || rs6000_tune == PROCESSOR_POWER4
16962 || rs6000_tune == PROCESSOR_POWER5
16963 || rs6000_tune == PROCESSOR_POWER7
16964 || rs6000_tune == PROCESSOR_POWER8
16965 || rs6000_tune == PROCESSOR_POWER9
16966 || rs6000_tune == PROCESSOR_FUTURE
16967 || rs6000_tune == PROCESSOR_CELL)
16968 && recog_memoized (dep_insn)
16969 && (INSN_CODE (dep_insn) >= 0))
16970
16971 switch (get_attr_type (dep_insn))
16972 {
16973 case TYPE_CMP:
16974 case TYPE_FPCOMPARE:
16975 case TYPE_CR_LOGICAL:
16976 return cost + 2;
16977 case TYPE_EXTS:
16978 case TYPE_MUL:
16979 if (get_attr_dot (dep_insn) == DOT_YES)
16980 return cost + 2;
16981 else
16982 break;
16983 case TYPE_SHIFT:
16984 if (get_attr_dot (dep_insn) == DOT_YES
16985 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16986 return cost + 2;
16987 else
16988 break;
16989 default:
16990 break;
16991 }
16992 break;
16993
16994 case TYPE_STORE:
16995 case TYPE_FPSTORE:
16996 if ((rs6000_tune == PROCESSOR_POWER6)
16997 && recog_memoized (dep_insn)
16998 && (INSN_CODE (dep_insn) >= 0))
16999 {
17000
17001 if (GET_CODE (PATTERN (insn)) != SET)
17002 /* If this happens, we have to extend this to schedule
17003 optimally. Return default for now. */
17004 return cost;
17005
17006 /* Adjust the cost for the case where the value written
17007 by a fixed point operation is used as the address
17008 gen value on a store. */
17009 switch (get_attr_type (dep_insn))
17010 {
17011 case TYPE_LOAD:
17012 case TYPE_CNTLZ:
17013 {
17014 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17015 return get_attr_sign_extend (dep_insn)
17016 == SIGN_EXTEND_YES ? 6 : 4;
17017 break;
17018 }
17019 case TYPE_SHIFT:
17020 {
17021 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17022 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17023 6 : 3;
17024 break;
17025 }
17026 case TYPE_INTEGER:
17027 case TYPE_ADD:
17028 case TYPE_LOGICAL:
17029 case TYPE_EXTS:
17030 case TYPE_INSERT:
17031 {
17032 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17033 return 3;
17034 break;
17035 }
17036 case TYPE_STORE:
17037 case TYPE_FPLOAD:
17038 case TYPE_FPSTORE:
17039 {
17040 if (get_attr_update (dep_insn) == UPDATE_YES
17041 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17042 return 3;
17043 break;
17044 }
17045 case TYPE_MUL:
17046 {
17047 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17048 return 17;
17049 break;
17050 }
17051 case TYPE_DIV:
17052 {
17053 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17054 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17055 break;
17056 }
17057 default:
17058 break;
17059 }
17060 }
17061 break;
17062
17063 case TYPE_LOAD:
17064 if ((rs6000_tune == PROCESSOR_POWER6)
17065 && recog_memoized (dep_insn)
17066 && (INSN_CODE (dep_insn) >= 0))
17067 {
17068
17069 /* Adjust the cost for the case where the value written
17070 by a fixed point instruction is used within the address
17071 gen portion of a subsequent load(u)(x) */
17072 switch (get_attr_type (dep_insn))
17073 {
17074 case TYPE_LOAD:
17075 case TYPE_CNTLZ:
17076 {
17077 if (set_to_load_agen (dep_insn, insn))
17078 return get_attr_sign_extend (dep_insn)
17079 == SIGN_EXTEND_YES ? 6 : 4;
17080 break;
17081 }
17082 case TYPE_SHIFT:
17083 {
17084 if (set_to_load_agen (dep_insn, insn))
17085 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17086 6 : 3;
17087 break;
17088 }
17089 case TYPE_INTEGER:
17090 case TYPE_ADD:
17091 case TYPE_LOGICAL:
17092 case TYPE_EXTS:
17093 case TYPE_INSERT:
17094 {
17095 if (set_to_load_agen (dep_insn, insn))
17096 return 3;
17097 break;
17098 }
17099 case TYPE_STORE:
17100 case TYPE_FPLOAD:
17101 case TYPE_FPSTORE:
17102 {
17103 if (get_attr_update (dep_insn) == UPDATE_YES
17104 && set_to_load_agen (dep_insn, insn))
17105 return 3;
17106 break;
17107 }
17108 case TYPE_MUL:
17109 {
17110 if (set_to_load_agen (dep_insn, insn))
17111 return 17;
17112 break;
17113 }
17114 case TYPE_DIV:
17115 {
17116 if (set_to_load_agen (dep_insn, insn))
17117 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17118 break;
17119 }
17120 default:
17121 break;
17122 }
17123 }
17124 break;
17125
17126 case TYPE_FPLOAD:
17127 if ((rs6000_tune == PROCESSOR_POWER6)
17128 && get_attr_update (insn) == UPDATE_NO
17129 && recog_memoized (dep_insn)
17130 && (INSN_CODE (dep_insn) >= 0)
17131 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17132 return 2;
17133
17134 default:
17135 break;
17136 }
17137
17138 /* Fall out to return default cost. */
17139 }
17140 break;
17141
17142 case REG_DEP_OUTPUT:
17143 /* Output dependency; DEP_INSN writes a register that INSN writes some
17144 cycles later. */
17145 if ((rs6000_tune == PROCESSOR_POWER6)
17146 && recog_memoized (dep_insn)
17147 && (INSN_CODE (dep_insn) >= 0))
17148 {
17149 attr_type = get_attr_type (insn);
17150
17151 switch (attr_type)
17152 {
17153 case TYPE_FP:
17154 case TYPE_FPSIMPLE:
17155 if (get_attr_type (dep_insn) == TYPE_FP
17156 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17157 return 1;
17158 break;
17159 case TYPE_FPLOAD:
17160 if (get_attr_update (insn) == UPDATE_NO
17161 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17162 return 2;
17163 break;
17164 default:
17165 break;
17166 }
17167 }
17168 /* Fall through, no cost for output dependency. */
17169 /* FALLTHRU */
17170
17171 case REG_DEP_ANTI:
17172 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17173 cycles later. */
17174 return 0;
17175
17176 default:
17177 gcc_unreachable ();
17178 }
17179
17180 return cost;
17181 }
17182
17183 /* Debug version of rs6000_adjust_cost. */
17184
17185 static int
17186 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17187 int cost, unsigned int dw)
17188 {
17189 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17190
17191 if (ret != cost)
17192 {
17193 const char *dep;
17194
17195 switch (dep_type)
17196 {
17197 default: dep = "unknown depencency"; break;
17198 case REG_DEP_TRUE: dep = "data dependency"; break;
17199 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17200 case REG_DEP_ANTI: dep = "anti depencency"; break;
17201 }
17202
17203 fprintf (stderr,
17204 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17205 "%s, insn:\n", ret, cost, dep);
17206
17207 debug_rtx (insn);
17208 }
17209
17210 return ret;
17211 }
17212
17213 /* The function returns a true if INSN is microcoded.
17214 Return false otherwise. */
17215
17216 static bool
17217 is_microcoded_insn (rtx_insn *insn)
17218 {
17219 if (!insn || !NONDEBUG_INSN_P (insn)
17220 || GET_CODE (PATTERN (insn)) == USE
17221 || GET_CODE (PATTERN (insn)) == CLOBBER)
17222 return false;
17223
17224 if (rs6000_tune == PROCESSOR_CELL)
17225 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17226
17227 if (rs6000_sched_groups
17228 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17229 {
17230 enum attr_type type = get_attr_type (insn);
17231 if ((type == TYPE_LOAD
17232 && get_attr_update (insn) == UPDATE_YES
17233 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17234 || ((type == TYPE_LOAD || type == TYPE_STORE)
17235 && get_attr_update (insn) == UPDATE_YES
17236 && get_attr_indexed (insn) == INDEXED_YES)
17237 || type == TYPE_MFCR)
17238 return true;
17239 }
17240
17241 return false;
17242 }
17243
17244 /* The function returns true if INSN is cracked into 2 instructions
17245 by the processor (and therefore occupies 2 issue slots). */
17246
17247 static bool
17248 is_cracked_insn (rtx_insn *insn)
17249 {
17250 if (!insn || !NONDEBUG_INSN_P (insn)
17251 || GET_CODE (PATTERN (insn)) == USE
17252 || GET_CODE (PATTERN (insn)) == CLOBBER)
17253 return false;
17254
17255 if (rs6000_sched_groups
17256 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17257 {
17258 enum attr_type type = get_attr_type (insn);
17259 if ((type == TYPE_LOAD
17260 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17261 && get_attr_update (insn) == UPDATE_NO)
17262 || (type == TYPE_LOAD
17263 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17264 && get_attr_update (insn) == UPDATE_YES
17265 && get_attr_indexed (insn) == INDEXED_NO)
17266 || (type == TYPE_STORE
17267 && get_attr_update (insn) == UPDATE_YES
17268 && get_attr_indexed (insn) == INDEXED_NO)
17269 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17270 && get_attr_update (insn) == UPDATE_YES)
17271 || (type == TYPE_CR_LOGICAL
17272 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17273 || (type == TYPE_EXTS
17274 && get_attr_dot (insn) == DOT_YES)
17275 || (type == TYPE_SHIFT
17276 && get_attr_dot (insn) == DOT_YES
17277 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17278 || (type == TYPE_MUL
17279 && get_attr_dot (insn) == DOT_YES)
17280 || type == TYPE_DIV
17281 || (type == TYPE_INSERT
17282 && get_attr_size (insn) == SIZE_32))
17283 return true;
17284 }
17285
17286 return false;
17287 }
17288
17289 /* The function returns true if INSN can be issued only from
17290 the branch slot. */
17291
17292 static bool
17293 is_branch_slot_insn (rtx_insn *insn)
17294 {
17295 if (!insn || !NONDEBUG_INSN_P (insn)
17296 || GET_CODE (PATTERN (insn)) == USE
17297 || GET_CODE (PATTERN (insn)) == CLOBBER)
17298 return false;
17299
17300 if (rs6000_sched_groups)
17301 {
17302 enum attr_type type = get_attr_type (insn);
17303 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17304 return true;
17305 return false;
17306 }
17307
17308 return false;
17309 }
17310
17311 /* The function returns true if out_inst sets a value that is
17312 used in the address generation computation of in_insn */
17313 static bool
17314 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17315 {
17316 rtx out_set, in_set;
17317
17318 /* For performance reasons, only handle the simple case where
17319 both loads are a single_set. */
17320 out_set = single_set (out_insn);
17321 if (out_set)
17322 {
17323 in_set = single_set (in_insn);
17324 if (in_set)
17325 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17326 }
17327
17328 return false;
17329 }
17330
17331 /* Try to determine base/offset/size parts of the given MEM.
17332 Return true if successful, false if all the values couldn't
17333 be determined.
17334
17335 This function only looks for REG or REG+CONST address forms.
17336 REG+REG address form will return false. */
17337
17338 static bool
17339 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17340 HOST_WIDE_INT *size)
17341 {
17342 rtx addr_rtx;
17343 if MEM_SIZE_KNOWN_P (mem)
17344 *size = MEM_SIZE (mem);
17345 else
17346 return false;
17347
17348 addr_rtx = (XEXP (mem, 0));
17349 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17350 addr_rtx = XEXP (addr_rtx, 1);
17351
17352 *offset = 0;
17353 while (GET_CODE (addr_rtx) == PLUS
17354 && CONST_INT_P (XEXP (addr_rtx, 1)))
17355 {
17356 *offset += INTVAL (XEXP (addr_rtx, 1));
17357 addr_rtx = XEXP (addr_rtx, 0);
17358 }
17359 if (!REG_P (addr_rtx))
17360 return false;
17361
17362 *base = addr_rtx;
17363 return true;
17364 }
17365
17366 /* The function returns true if the target storage location of
17367 mem1 is adjacent to the target storage location of mem2 */
17368 /* Return 1 if memory locations are adjacent. */
17369
17370 static bool
17371 adjacent_mem_locations (rtx mem1, rtx mem2)
17372 {
17373 rtx reg1, reg2;
17374 HOST_WIDE_INT off1, size1, off2, size2;
17375
17376 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17377 && get_memref_parts (mem2, &reg2, &off2, &size2))
17378 return ((REGNO (reg1) == REGNO (reg2))
17379 && ((off1 + size1 == off2)
17380 || (off2 + size2 == off1)));
17381
17382 return false;
17383 }
17384
17385 /* This function returns true if it can be determined that the two MEM
17386 locations overlap by at least 1 byte based on base reg/offset/size. */
17387
17388 static bool
17389 mem_locations_overlap (rtx mem1, rtx mem2)
17390 {
17391 rtx reg1, reg2;
17392 HOST_WIDE_INT off1, size1, off2, size2;
17393
17394 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17395 && get_memref_parts (mem2, &reg2, &off2, &size2))
17396 return ((REGNO (reg1) == REGNO (reg2))
17397 && (((off1 <= off2) && (off1 + size1 > off2))
17398 || ((off2 <= off1) && (off2 + size2 > off1))));
17399
17400 return false;
17401 }
17402
17403 /* A C statement (sans semicolon) to update the integer scheduling
17404 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17405 INSN earlier, reduce the priority to execute INSN later. Do not
17406 define this macro if you do not need to adjust the scheduling
17407 priorities of insns. */
17408
17409 static int
17410 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17411 {
17412 rtx load_mem, str_mem;
17413 /* On machines (like the 750) which have asymmetric integer units,
17414 where one integer unit can do multiply and divides and the other
17415 can't, reduce the priority of multiply/divide so it is scheduled
17416 before other integer operations. */
17417
17418 #if 0
17419 if (! INSN_P (insn))
17420 return priority;
17421
17422 if (GET_CODE (PATTERN (insn)) == USE)
17423 return priority;
17424
17425 switch (rs6000_tune) {
17426 case PROCESSOR_PPC750:
17427 switch (get_attr_type (insn))
17428 {
17429 default:
17430 break;
17431
17432 case TYPE_MUL:
17433 case TYPE_DIV:
17434 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17435 priority, priority);
17436 if (priority >= 0 && priority < 0x01000000)
17437 priority >>= 3;
17438 break;
17439 }
17440 }
17441 #endif
17442
17443 if (insn_must_be_first_in_group (insn)
17444 && reload_completed
17445 && current_sched_info->sched_max_insns_priority
17446 && rs6000_sched_restricted_insns_priority)
17447 {
17448
17449 /* Prioritize insns that can be dispatched only in the first
17450 dispatch slot. */
17451 if (rs6000_sched_restricted_insns_priority == 1)
17452 /* Attach highest priority to insn. This means that in
17453 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17454 precede 'priority' (critical path) considerations. */
17455 return current_sched_info->sched_max_insns_priority;
17456 else if (rs6000_sched_restricted_insns_priority == 2)
17457 /* Increase priority of insn by a minimal amount. This means that in
17458 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17459 considerations precede dispatch-slot restriction considerations. */
17460 return (priority + 1);
17461 }
17462
17463 if (rs6000_tune == PROCESSOR_POWER6
17464 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17465 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17466 /* Attach highest priority to insn if the scheduler has just issued two
17467 stores and this instruction is a load, or two loads and this instruction
17468 is a store. Power6 wants loads and stores scheduled alternately
17469 when possible */
17470 return current_sched_info->sched_max_insns_priority;
17471
17472 return priority;
17473 }
17474
17475 /* Return true if the instruction is nonpipelined on the Cell. */
17476 static bool
17477 is_nonpipeline_insn (rtx_insn *insn)
17478 {
17479 enum attr_type type;
17480 if (!insn || !NONDEBUG_INSN_P (insn)
17481 || GET_CODE (PATTERN (insn)) == USE
17482 || GET_CODE (PATTERN (insn)) == CLOBBER)
17483 return false;
17484
17485 type = get_attr_type (insn);
17486 if (type == TYPE_MUL
17487 || type == TYPE_DIV
17488 || type == TYPE_SDIV
17489 || type == TYPE_DDIV
17490 || type == TYPE_SSQRT
17491 || type == TYPE_DSQRT
17492 || type == TYPE_MFCR
17493 || type == TYPE_MFCRF
17494 || type == TYPE_MFJMPR)
17495 {
17496 return true;
17497 }
17498 return false;
17499 }
17500
17501
17502 /* Return how many instructions the machine can issue per cycle. */
17503
17504 static int
17505 rs6000_issue_rate (void)
17506 {
17507 /* Unless scheduling for register pressure, use issue rate of 1 for
17508 first scheduling pass to decrease degradation. */
17509 if (!reload_completed && !flag_sched_pressure)
17510 return 1;
17511
17512 switch (rs6000_tune) {
17513 case PROCESSOR_RS64A:
17514 case PROCESSOR_PPC601: /* ? */
17515 case PROCESSOR_PPC7450:
17516 return 3;
17517 case PROCESSOR_PPC440:
17518 case PROCESSOR_PPC603:
17519 case PROCESSOR_PPC750:
17520 case PROCESSOR_PPC7400:
17521 case PROCESSOR_PPC8540:
17522 case PROCESSOR_PPC8548:
17523 case PROCESSOR_CELL:
17524 case PROCESSOR_PPCE300C2:
17525 case PROCESSOR_PPCE300C3:
17526 case PROCESSOR_PPCE500MC:
17527 case PROCESSOR_PPCE500MC64:
17528 case PROCESSOR_PPCE5500:
17529 case PROCESSOR_PPCE6500:
17530 case PROCESSOR_TITAN:
17531 return 2;
17532 case PROCESSOR_PPC476:
17533 case PROCESSOR_PPC604:
17534 case PROCESSOR_PPC604e:
17535 case PROCESSOR_PPC620:
17536 case PROCESSOR_PPC630:
17537 return 4;
17538 case PROCESSOR_POWER4:
17539 case PROCESSOR_POWER5:
17540 case PROCESSOR_POWER6:
17541 case PROCESSOR_POWER7:
17542 return 5;
17543 case PROCESSOR_POWER8:
17544 return 7;
17545 case PROCESSOR_POWER9:
17546 case PROCESSOR_FUTURE:
17547 return 6;
17548 default:
17549 return 1;
17550 }
17551 }
17552
17553 /* Return how many instructions to look ahead for better insn
17554 scheduling. */
17555
17556 static int
17557 rs6000_use_sched_lookahead (void)
17558 {
17559 switch (rs6000_tune)
17560 {
17561 case PROCESSOR_PPC8540:
17562 case PROCESSOR_PPC8548:
17563 return 4;
17564
17565 case PROCESSOR_CELL:
17566 return (reload_completed ? 8 : 0);
17567
17568 default:
17569 return 0;
17570 }
17571 }
17572
17573 /* We are choosing insn from the ready queue. Return zero if INSN can be
17574 chosen. */
17575 static int
17576 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17577 {
17578 if (ready_index == 0)
17579 return 0;
17580
17581 if (rs6000_tune != PROCESSOR_CELL)
17582 return 0;
17583
17584 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17585
17586 if (!reload_completed
17587 || is_nonpipeline_insn (insn)
17588 || is_microcoded_insn (insn))
17589 return 1;
17590
17591 return 0;
17592 }
17593
17594 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17595 and return true. */
17596
17597 static bool
17598 find_mem_ref (rtx pat, rtx *mem_ref)
17599 {
17600 const char * fmt;
17601 int i, j;
17602
17603 /* stack_tie does not produce any real memory traffic. */
17604 if (tie_operand (pat, VOIDmode))
17605 return false;
17606
17607 if (MEM_P (pat))
17608 {
17609 *mem_ref = pat;
17610 return true;
17611 }
17612
17613 /* Recursively process the pattern. */
17614 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17615
17616 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17617 {
17618 if (fmt[i] == 'e')
17619 {
17620 if (find_mem_ref (XEXP (pat, i), mem_ref))
17621 return true;
17622 }
17623 else if (fmt[i] == 'E')
17624 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17625 {
17626 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17627 return true;
17628 }
17629 }
17630
17631 return false;
17632 }
17633
17634 /* Determine if PAT is a PATTERN of a load insn. */
17635
17636 static bool
17637 is_load_insn1 (rtx pat, rtx *load_mem)
17638 {
17639 if (!pat || pat == NULL_RTX)
17640 return false;
17641
17642 if (GET_CODE (pat) == SET)
17643 return find_mem_ref (SET_SRC (pat), load_mem);
17644
17645 if (GET_CODE (pat) == PARALLEL)
17646 {
17647 int i;
17648
17649 for (i = 0; i < XVECLEN (pat, 0); i++)
17650 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17651 return true;
17652 }
17653
17654 return false;
17655 }
17656
17657 /* Determine if INSN loads from memory. */
17658
17659 static bool
17660 is_load_insn (rtx insn, rtx *load_mem)
17661 {
17662 if (!insn || !INSN_P (insn))
17663 return false;
17664
17665 if (CALL_P (insn))
17666 return false;
17667
17668 return is_load_insn1 (PATTERN (insn), load_mem);
17669 }
17670
17671 /* Determine if PAT is a PATTERN of a store insn. */
17672
17673 static bool
17674 is_store_insn1 (rtx pat, rtx *str_mem)
17675 {
17676 if (!pat || pat == NULL_RTX)
17677 return false;
17678
17679 if (GET_CODE (pat) == SET)
17680 return find_mem_ref (SET_DEST (pat), str_mem);
17681
17682 if (GET_CODE (pat) == PARALLEL)
17683 {
17684 int i;
17685
17686 for (i = 0; i < XVECLEN (pat, 0); i++)
17687 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17688 return true;
17689 }
17690
17691 return false;
17692 }
17693
17694 /* Determine if INSN stores to memory. */
17695
17696 static bool
17697 is_store_insn (rtx insn, rtx *str_mem)
17698 {
17699 if (!insn || !INSN_P (insn))
17700 return false;
17701
17702 return is_store_insn1 (PATTERN (insn), str_mem);
17703 }
17704
17705 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17706
17707 static bool
17708 is_power9_pairable_vec_type (enum attr_type type)
17709 {
17710 switch (type)
17711 {
17712 case TYPE_VECSIMPLE:
17713 case TYPE_VECCOMPLEX:
17714 case TYPE_VECDIV:
17715 case TYPE_VECCMP:
17716 case TYPE_VECPERM:
17717 case TYPE_VECFLOAT:
17718 case TYPE_VECFDIV:
17719 case TYPE_VECDOUBLE:
17720 return true;
17721 default:
17722 break;
17723 }
17724 return false;
17725 }
17726
17727 /* Returns whether the dependence between INSN and NEXT is considered
17728 costly by the given target. */
17729
17730 static bool
17731 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17732 {
17733 rtx insn;
17734 rtx next;
17735 rtx load_mem, str_mem;
17736
17737 /* If the flag is not enabled - no dependence is considered costly;
17738 allow all dependent insns in the same group.
17739 This is the most aggressive option. */
17740 if (rs6000_sched_costly_dep == no_dep_costly)
17741 return false;
17742
17743 /* If the flag is set to 1 - a dependence is always considered costly;
17744 do not allow dependent instructions in the same group.
17745 This is the most conservative option. */
17746 if (rs6000_sched_costly_dep == all_deps_costly)
17747 return true;
17748
17749 insn = DEP_PRO (dep);
17750 next = DEP_CON (dep);
17751
17752 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17753 && is_load_insn (next, &load_mem)
17754 && is_store_insn (insn, &str_mem))
17755 /* Prevent load after store in the same group. */
17756 return true;
17757
17758 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17759 && is_load_insn (next, &load_mem)
17760 && is_store_insn (insn, &str_mem)
17761 && DEP_TYPE (dep) == REG_DEP_TRUE
17762 && mem_locations_overlap(str_mem, load_mem))
17763 /* Prevent load after store in the same group if it is a true
17764 dependence. */
17765 return true;
17766
17767 /* The flag is set to X; dependences with latency >= X are considered costly,
17768 and will not be scheduled in the same group. */
17769 if (rs6000_sched_costly_dep <= max_dep_latency
17770 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17771 return true;
17772
17773 return false;
17774 }
17775
17776 /* Return the next insn after INSN that is found before TAIL is reached,
17777 skipping any "non-active" insns - insns that will not actually occupy
17778 an issue slot. Return NULL_RTX if such an insn is not found. */
17779
17780 static rtx_insn *
17781 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17782 {
17783 if (insn == NULL_RTX || insn == tail)
17784 return NULL;
17785
17786 while (1)
17787 {
17788 insn = NEXT_INSN (insn);
17789 if (insn == NULL_RTX || insn == tail)
17790 return NULL;
17791
17792 if (CALL_P (insn)
17793 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17794 || (NONJUMP_INSN_P (insn)
17795 && GET_CODE (PATTERN (insn)) != USE
17796 && GET_CODE (PATTERN (insn)) != CLOBBER
17797 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17798 break;
17799 }
17800 return insn;
17801 }
17802
17803 /* Move instruction at POS to the end of the READY list. */
17804
17805 static void
17806 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17807 {
17808 rtx_insn *tmp;
17809 int i;
17810
17811 tmp = ready[pos];
17812 for (i = pos; i < lastpos; i++)
17813 ready[i] = ready[i + 1];
17814 ready[lastpos] = tmp;
17815 }
17816
17817 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17818
17819 static int
17820 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17821 {
17822 /* For Power6, we need to handle some special cases to try and keep the
17823 store queue from overflowing and triggering expensive flushes.
17824
17825 This code monitors how load and store instructions are being issued
17826 and skews the ready list one way or the other to increase the likelihood
17827 that a desired instruction is issued at the proper time.
17828
17829 A couple of things are done. First, we maintain a "load_store_pendulum"
17830 to track the current state of load/store issue.
17831
17832 - If the pendulum is at zero, then no loads or stores have been
17833 issued in the current cycle so we do nothing.
17834
17835 - If the pendulum is 1, then a single load has been issued in this
17836 cycle and we attempt to locate another load in the ready list to
17837 issue with it.
17838
17839 - If the pendulum is -2, then two stores have already been
17840 issued in this cycle, so we increase the priority of the first load
17841 in the ready list to increase it's likelihood of being chosen first
17842 in the next cycle.
17843
17844 - If the pendulum is -1, then a single store has been issued in this
17845 cycle and we attempt to locate another store in the ready list to
17846 issue with it, preferring a store to an adjacent memory location to
17847 facilitate store pairing in the store queue.
17848
17849 - If the pendulum is 2, then two loads have already been
17850 issued in this cycle, so we increase the priority of the first store
17851 in the ready list to increase it's likelihood of being chosen first
17852 in the next cycle.
17853
17854 - If the pendulum < -2 or > 2, then do nothing.
17855
17856 Note: This code covers the most common scenarios. There exist non
17857 load/store instructions which make use of the LSU and which
17858 would need to be accounted for to strictly model the behavior
17859 of the machine. Those instructions are currently unaccounted
17860 for to help minimize compile time overhead of this code.
17861 */
17862 int pos;
17863 rtx load_mem, str_mem;
17864
17865 if (is_store_insn (last_scheduled_insn, &str_mem))
17866 /* Issuing a store, swing the load_store_pendulum to the left */
17867 load_store_pendulum--;
17868 else if (is_load_insn (last_scheduled_insn, &load_mem))
17869 /* Issuing a load, swing the load_store_pendulum to the right */
17870 load_store_pendulum++;
17871 else
17872 return cached_can_issue_more;
17873
17874 /* If the pendulum is balanced, or there is only one instruction on
17875 the ready list, then all is well, so return. */
17876 if ((load_store_pendulum == 0) || (lastpos <= 0))
17877 return cached_can_issue_more;
17878
17879 if (load_store_pendulum == 1)
17880 {
17881 /* A load has been issued in this cycle. Scan the ready list
17882 for another load to issue with it */
17883 pos = lastpos;
17884
17885 while (pos >= 0)
17886 {
17887 if (is_load_insn (ready[pos], &load_mem))
17888 {
17889 /* Found a load. Move it to the head of the ready list,
17890 and adjust it's priority so that it is more likely to
17891 stay there */
17892 move_to_end_of_ready (ready, pos, lastpos);
17893
17894 if (!sel_sched_p ()
17895 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17896 INSN_PRIORITY (ready[lastpos])++;
17897 break;
17898 }
17899 pos--;
17900 }
17901 }
17902 else if (load_store_pendulum == -2)
17903 {
17904 /* Two stores have been issued in this cycle. Increase the
17905 priority of the first load in the ready list to favor it for
17906 issuing in the next cycle. */
17907 pos = lastpos;
17908
17909 while (pos >= 0)
17910 {
17911 if (is_load_insn (ready[pos], &load_mem)
17912 && !sel_sched_p ()
17913 && INSN_PRIORITY_KNOWN (ready[pos]))
17914 {
17915 INSN_PRIORITY (ready[pos])++;
17916
17917 /* Adjust the pendulum to account for the fact that a load
17918 was found and increased in priority. This is to prevent
17919 increasing the priority of multiple loads */
17920 load_store_pendulum--;
17921
17922 break;
17923 }
17924 pos--;
17925 }
17926 }
17927 else if (load_store_pendulum == -1)
17928 {
17929 /* A store has been issued in this cycle. Scan the ready list for
17930 another store to issue with it, preferring a store to an adjacent
17931 memory location */
17932 int first_store_pos = -1;
17933
17934 pos = lastpos;
17935
17936 while (pos >= 0)
17937 {
17938 if (is_store_insn (ready[pos], &str_mem))
17939 {
17940 rtx str_mem2;
17941 /* Maintain the index of the first store found on the
17942 list */
17943 if (first_store_pos == -1)
17944 first_store_pos = pos;
17945
17946 if (is_store_insn (last_scheduled_insn, &str_mem2)
17947 && adjacent_mem_locations (str_mem, str_mem2))
17948 {
17949 /* Found an adjacent store. Move it to the head of the
17950 ready list, and adjust it's priority so that it is
17951 more likely to stay there */
17952 move_to_end_of_ready (ready, pos, lastpos);
17953
17954 if (!sel_sched_p ()
17955 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17956 INSN_PRIORITY (ready[lastpos])++;
17957
17958 first_store_pos = -1;
17959
17960 break;
17961 };
17962 }
17963 pos--;
17964 }
17965
17966 if (first_store_pos >= 0)
17967 {
17968 /* An adjacent store wasn't found, but a non-adjacent store was,
17969 so move the non-adjacent store to the front of the ready
17970 list, and adjust its priority so that it is more likely to
17971 stay there. */
17972 move_to_end_of_ready (ready, first_store_pos, lastpos);
17973 if (!sel_sched_p ()
17974 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17975 INSN_PRIORITY (ready[lastpos])++;
17976 }
17977 }
17978 else if (load_store_pendulum == 2)
17979 {
17980 /* Two loads have been issued in this cycle. Increase the priority
17981 of the first store in the ready list to favor it for issuing in
17982 the next cycle. */
17983 pos = lastpos;
17984
17985 while (pos >= 0)
17986 {
17987 if (is_store_insn (ready[pos], &str_mem)
17988 && !sel_sched_p ()
17989 && INSN_PRIORITY_KNOWN (ready[pos]))
17990 {
17991 INSN_PRIORITY (ready[pos])++;
17992
17993 /* Adjust the pendulum to account for the fact that a store
17994 was found and increased in priority. This is to prevent
17995 increasing the priority of multiple stores */
17996 load_store_pendulum++;
17997
17998 break;
17999 }
18000 pos--;
18001 }
18002 }
18003
18004 return cached_can_issue_more;
18005 }
18006
18007 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18008
18009 static int
18010 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18011 {
18012 int pos;
18013 enum attr_type type, type2;
18014
18015 type = get_attr_type (last_scheduled_insn);
18016
18017 /* Try to issue fixed point divides back-to-back in pairs so they will be
18018 routed to separate execution units and execute in parallel. */
18019 if (type == TYPE_DIV && divide_cnt == 0)
18020 {
18021 /* First divide has been scheduled. */
18022 divide_cnt = 1;
18023
18024 /* Scan the ready list looking for another divide, if found move it
18025 to the end of the list so it is chosen next. */
18026 pos = lastpos;
18027 while (pos >= 0)
18028 {
18029 if (recog_memoized (ready[pos]) >= 0
18030 && get_attr_type (ready[pos]) == TYPE_DIV)
18031 {
18032 move_to_end_of_ready (ready, pos, lastpos);
18033 break;
18034 }
18035 pos--;
18036 }
18037 }
18038 else
18039 {
18040 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18041 divide_cnt = 0;
18042
18043 /* The best dispatch throughput for vector and vector load insns can be
18044 achieved by interleaving a vector and vector load such that they'll
18045 dispatch to the same superslice. If this pairing cannot be achieved
18046 then it is best to pair vector insns together and vector load insns
18047 together.
18048
18049 To aid in this pairing, vec_pairing maintains the current state with
18050 the following values:
18051
18052 0 : Initial state, no vecload/vector pairing has been started.
18053
18054 1 : A vecload or vector insn has been issued and a candidate for
18055 pairing has been found and moved to the end of the ready
18056 list. */
18057 if (type == TYPE_VECLOAD)
18058 {
18059 /* Issued a vecload. */
18060 if (vec_pairing == 0)
18061 {
18062 int vecload_pos = -1;
18063 /* We issued a single vecload, look for a vector insn to pair it
18064 with. If one isn't found, try to pair another vecload. */
18065 pos = lastpos;
18066 while (pos >= 0)
18067 {
18068 if (recog_memoized (ready[pos]) >= 0)
18069 {
18070 type2 = get_attr_type (ready[pos]);
18071 if (is_power9_pairable_vec_type (type2))
18072 {
18073 /* Found a vector insn to pair with, move it to the
18074 end of the ready list so it is scheduled next. */
18075 move_to_end_of_ready (ready, pos, lastpos);
18076 vec_pairing = 1;
18077 return cached_can_issue_more;
18078 }
18079 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18080 /* Remember position of first vecload seen. */
18081 vecload_pos = pos;
18082 }
18083 pos--;
18084 }
18085 if (vecload_pos >= 0)
18086 {
18087 /* Didn't find a vector to pair with but did find a vecload,
18088 move it to the end of the ready list. */
18089 move_to_end_of_ready (ready, vecload_pos, lastpos);
18090 vec_pairing = 1;
18091 return cached_can_issue_more;
18092 }
18093 }
18094 }
18095 else if (is_power9_pairable_vec_type (type))
18096 {
18097 /* Issued a vector operation. */
18098 if (vec_pairing == 0)
18099 {
18100 int vec_pos = -1;
18101 /* We issued a single vector insn, look for a vecload to pair it
18102 with. If one isn't found, try to pair another vector. */
18103 pos = lastpos;
18104 while (pos >= 0)
18105 {
18106 if (recog_memoized (ready[pos]) >= 0)
18107 {
18108 type2 = get_attr_type (ready[pos]);
18109 if (type2 == TYPE_VECLOAD)
18110 {
18111 /* Found a vecload insn to pair with, move it to the
18112 end of the ready list so it is scheduled next. */
18113 move_to_end_of_ready (ready, pos, lastpos);
18114 vec_pairing = 1;
18115 return cached_can_issue_more;
18116 }
18117 else if (is_power9_pairable_vec_type (type2)
18118 && vec_pos == -1)
18119 /* Remember position of first vector insn seen. */
18120 vec_pos = pos;
18121 }
18122 pos--;
18123 }
18124 if (vec_pos >= 0)
18125 {
18126 /* Didn't find a vecload to pair with but did find a vector
18127 insn, move it to the end of the ready list. */
18128 move_to_end_of_ready (ready, vec_pos, lastpos);
18129 vec_pairing = 1;
18130 return cached_can_issue_more;
18131 }
18132 }
18133 }
18134
18135 /* We've either finished a vec/vecload pair, couldn't find an insn to
18136 continue the current pair, or the last insn had nothing to do with
18137 with pairing. In any case, reset the state. */
18138 vec_pairing = 0;
18139 }
18140
18141 return cached_can_issue_more;
18142 }
18143
18144 /* We are about to begin issuing insns for this clock cycle. */
18145
18146 static int
18147 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18148 rtx_insn **ready ATTRIBUTE_UNUSED,
18149 int *pn_ready ATTRIBUTE_UNUSED,
18150 int clock_var ATTRIBUTE_UNUSED)
18151 {
18152 int n_ready = *pn_ready;
18153
18154 if (sched_verbose)
18155 fprintf (dump, "// rs6000_sched_reorder :\n");
18156
18157 /* Reorder the ready list, if the second to last ready insn
18158 is a nonepipeline insn. */
18159 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18160 {
18161 if (is_nonpipeline_insn (ready[n_ready - 1])
18162 && (recog_memoized (ready[n_ready - 2]) > 0))
18163 /* Simply swap first two insns. */
18164 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18165 }
18166
18167 if (rs6000_tune == PROCESSOR_POWER6)
18168 load_store_pendulum = 0;
18169
18170 return rs6000_issue_rate ();
18171 }
18172
18173 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18174
18175 static int
18176 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18177 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18178 {
18179 if (sched_verbose)
18180 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18181
18182 /* Do Power6 dependent reordering if necessary. */
18183 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18184 return power6_sched_reorder2 (ready, *pn_ready - 1);
18185
18186 /* Do Power9 dependent reordering if necessary. */
18187 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18188 && recog_memoized (last_scheduled_insn) >= 0)
18189 return power9_sched_reorder2 (ready, *pn_ready - 1);
18190
18191 return cached_can_issue_more;
18192 }
18193
18194 /* Return whether the presence of INSN causes a dispatch group termination
18195 of group WHICH_GROUP.
18196
18197 If WHICH_GROUP == current_group, this function will return true if INSN
18198 causes the termination of the current group (i.e, the dispatch group to
18199 which INSN belongs). This means that INSN will be the last insn in the
18200 group it belongs to.
18201
18202 If WHICH_GROUP == previous_group, this function will return true if INSN
18203 causes the termination of the previous group (i.e, the dispatch group that
18204 precedes the group to which INSN belongs). This means that INSN will be
18205 the first insn in the group it belongs to). */
18206
18207 static bool
18208 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18209 {
18210 bool first, last;
18211
18212 if (! insn)
18213 return false;
18214
18215 first = insn_must_be_first_in_group (insn);
18216 last = insn_must_be_last_in_group (insn);
18217
18218 if (first && last)
18219 return true;
18220
18221 if (which_group == current_group)
18222 return last;
18223 else if (which_group == previous_group)
18224 return first;
18225
18226 return false;
18227 }
18228
18229
18230 static bool
18231 insn_must_be_first_in_group (rtx_insn *insn)
18232 {
18233 enum attr_type type;
18234
18235 if (!insn
18236 || NOTE_P (insn)
18237 || DEBUG_INSN_P (insn)
18238 || GET_CODE (PATTERN (insn)) == USE
18239 || GET_CODE (PATTERN (insn)) == CLOBBER)
18240 return false;
18241
18242 switch (rs6000_tune)
18243 {
18244 case PROCESSOR_POWER5:
18245 if (is_cracked_insn (insn))
18246 return true;
18247 /* FALLTHRU */
18248 case PROCESSOR_POWER4:
18249 if (is_microcoded_insn (insn))
18250 return true;
18251
18252 if (!rs6000_sched_groups)
18253 return false;
18254
18255 type = get_attr_type (insn);
18256
18257 switch (type)
18258 {
18259 case TYPE_MFCR:
18260 case TYPE_MFCRF:
18261 case TYPE_MTCR:
18262 case TYPE_CR_LOGICAL:
18263 case TYPE_MTJMPR:
18264 case TYPE_MFJMPR:
18265 case TYPE_DIV:
18266 case TYPE_LOAD_L:
18267 case TYPE_STORE_C:
18268 case TYPE_ISYNC:
18269 case TYPE_SYNC:
18270 return true;
18271 default:
18272 break;
18273 }
18274 break;
18275 case PROCESSOR_POWER6:
18276 type = get_attr_type (insn);
18277
18278 switch (type)
18279 {
18280 case TYPE_EXTS:
18281 case TYPE_CNTLZ:
18282 case TYPE_TRAP:
18283 case TYPE_MUL:
18284 case TYPE_INSERT:
18285 case TYPE_FPCOMPARE:
18286 case TYPE_MFCR:
18287 case TYPE_MTCR:
18288 case TYPE_MFJMPR:
18289 case TYPE_MTJMPR:
18290 case TYPE_ISYNC:
18291 case TYPE_SYNC:
18292 case TYPE_LOAD_L:
18293 case TYPE_STORE_C:
18294 return true;
18295 case TYPE_SHIFT:
18296 if (get_attr_dot (insn) == DOT_NO
18297 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18298 return true;
18299 else
18300 break;
18301 case TYPE_DIV:
18302 if (get_attr_size (insn) == SIZE_32)
18303 return true;
18304 else
18305 break;
18306 case TYPE_LOAD:
18307 case TYPE_STORE:
18308 case TYPE_FPLOAD:
18309 case TYPE_FPSTORE:
18310 if (get_attr_update (insn) == UPDATE_YES)
18311 return true;
18312 else
18313 break;
18314 default:
18315 break;
18316 }
18317 break;
18318 case PROCESSOR_POWER7:
18319 type = get_attr_type (insn);
18320
18321 switch (type)
18322 {
18323 case TYPE_CR_LOGICAL:
18324 case TYPE_MFCR:
18325 case TYPE_MFCRF:
18326 case TYPE_MTCR:
18327 case TYPE_DIV:
18328 case TYPE_ISYNC:
18329 case TYPE_LOAD_L:
18330 case TYPE_STORE_C:
18331 case TYPE_MFJMPR:
18332 case TYPE_MTJMPR:
18333 return true;
18334 case TYPE_MUL:
18335 case TYPE_SHIFT:
18336 case TYPE_EXTS:
18337 if (get_attr_dot (insn) == DOT_YES)
18338 return true;
18339 else
18340 break;
18341 case TYPE_LOAD:
18342 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18343 || get_attr_update (insn) == UPDATE_YES)
18344 return true;
18345 else
18346 break;
18347 case TYPE_STORE:
18348 case TYPE_FPLOAD:
18349 case TYPE_FPSTORE:
18350 if (get_attr_update (insn) == UPDATE_YES)
18351 return true;
18352 else
18353 break;
18354 default:
18355 break;
18356 }
18357 break;
18358 case PROCESSOR_POWER8:
18359 type = get_attr_type (insn);
18360
18361 switch (type)
18362 {
18363 case TYPE_CR_LOGICAL:
18364 case TYPE_MFCR:
18365 case TYPE_MFCRF:
18366 case TYPE_MTCR:
18367 case TYPE_SYNC:
18368 case TYPE_ISYNC:
18369 case TYPE_LOAD_L:
18370 case TYPE_STORE_C:
18371 case TYPE_VECSTORE:
18372 case TYPE_MFJMPR:
18373 case TYPE_MTJMPR:
18374 return true;
18375 case TYPE_SHIFT:
18376 case TYPE_EXTS:
18377 case TYPE_MUL:
18378 if (get_attr_dot (insn) == DOT_YES)
18379 return true;
18380 else
18381 break;
18382 case TYPE_LOAD:
18383 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18384 || get_attr_update (insn) == UPDATE_YES)
18385 return true;
18386 else
18387 break;
18388 case TYPE_STORE:
18389 if (get_attr_update (insn) == UPDATE_YES
18390 && get_attr_indexed (insn) == INDEXED_YES)
18391 return true;
18392 else
18393 break;
18394 default:
18395 break;
18396 }
18397 break;
18398 default:
18399 break;
18400 }
18401
18402 return false;
18403 }
18404
18405 static bool
18406 insn_must_be_last_in_group (rtx_insn *insn)
18407 {
18408 enum attr_type type;
18409
18410 if (!insn
18411 || NOTE_P (insn)
18412 || DEBUG_INSN_P (insn)
18413 || GET_CODE (PATTERN (insn)) == USE
18414 || GET_CODE (PATTERN (insn)) == CLOBBER)
18415 return false;
18416
18417 switch (rs6000_tune) {
18418 case PROCESSOR_POWER4:
18419 case PROCESSOR_POWER5:
18420 if (is_microcoded_insn (insn))
18421 return true;
18422
18423 if (is_branch_slot_insn (insn))
18424 return true;
18425
18426 break;
18427 case PROCESSOR_POWER6:
18428 type = get_attr_type (insn);
18429
18430 switch (type)
18431 {
18432 case TYPE_EXTS:
18433 case TYPE_CNTLZ:
18434 case TYPE_TRAP:
18435 case TYPE_MUL:
18436 case TYPE_FPCOMPARE:
18437 case TYPE_MFCR:
18438 case TYPE_MTCR:
18439 case TYPE_MFJMPR:
18440 case TYPE_MTJMPR:
18441 case TYPE_ISYNC:
18442 case TYPE_SYNC:
18443 case TYPE_LOAD_L:
18444 case TYPE_STORE_C:
18445 return true;
18446 case TYPE_SHIFT:
18447 if (get_attr_dot (insn) == DOT_NO
18448 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18449 return true;
18450 else
18451 break;
18452 case TYPE_DIV:
18453 if (get_attr_size (insn) == SIZE_32)
18454 return true;
18455 else
18456 break;
18457 default:
18458 break;
18459 }
18460 break;
18461 case PROCESSOR_POWER7:
18462 type = get_attr_type (insn);
18463
18464 switch (type)
18465 {
18466 case TYPE_ISYNC:
18467 case TYPE_SYNC:
18468 case TYPE_LOAD_L:
18469 case TYPE_STORE_C:
18470 return true;
18471 case TYPE_LOAD:
18472 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18473 && get_attr_update (insn) == UPDATE_YES)
18474 return true;
18475 else
18476 break;
18477 case TYPE_STORE:
18478 if (get_attr_update (insn) == UPDATE_YES
18479 && get_attr_indexed (insn) == INDEXED_YES)
18480 return true;
18481 else
18482 break;
18483 default:
18484 break;
18485 }
18486 break;
18487 case PROCESSOR_POWER8:
18488 type = get_attr_type (insn);
18489
18490 switch (type)
18491 {
18492 case TYPE_MFCR:
18493 case TYPE_MTCR:
18494 case TYPE_ISYNC:
18495 case TYPE_SYNC:
18496 case TYPE_LOAD_L:
18497 case TYPE_STORE_C:
18498 return true;
18499 case TYPE_LOAD:
18500 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18501 && get_attr_update (insn) == UPDATE_YES)
18502 return true;
18503 else
18504 break;
18505 case TYPE_STORE:
18506 if (get_attr_update (insn) == UPDATE_YES
18507 && get_attr_indexed (insn) == INDEXED_YES)
18508 return true;
18509 else
18510 break;
18511 default:
18512 break;
18513 }
18514 break;
18515 default:
18516 break;
18517 }
18518
18519 return false;
18520 }
18521
18522 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18523 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18524
18525 static bool
18526 is_costly_group (rtx *group_insns, rtx next_insn)
18527 {
18528 int i;
18529 int issue_rate = rs6000_issue_rate ();
18530
18531 for (i = 0; i < issue_rate; i++)
18532 {
18533 sd_iterator_def sd_it;
18534 dep_t dep;
18535 rtx insn = group_insns[i];
18536
18537 if (!insn)
18538 continue;
18539
18540 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18541 {
18542 rtx next = DEP_CON (dep);
18543
18544 if (next == next_insn
18545 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18546 return true;
18547 }
18548 }
18549
18550 return false;
18551 }
18552
18553 /* Utility of the function redefine_groups.
18554 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18555 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18556 to keep it "far" (in a separate group) from GROUP_INSNS, following
18557 one of the following schemes, depending on the value of the flag
18558 -minsert_sched_nops = X:
18559 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18560 in order to force NEXT_INSN into a separate group.
18561 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18562 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18563 insertion (has a group just ended, how many vacant issue slots remain in the
18564 last group, and how many dispatch groups were encountered so far). */
18565
18566 static int
18567 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18568 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18569 int *group_count)
18570 {
18571 rtx nop;
18572 bool force;
18573 int issue_rate = rs6000_issue_rate ();
18574 bool end = *group_end;
18575 int i;
18576
18577 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18578 return can_issue_more;
18579
18580 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18581 return can_issue_more;
18582
18583 force = is_costly_group (group_insns, next_insn);
18584 if (!force)
18585 return can_issue_more;
18586
18587 if (sched_verbose > 6)
18588 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18589 *group_count ,can_issue_more);
18590
18591 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18592 {
18593 if (*group_end)
18594 can_issue_more = 0;
18595
18596 /* Since only a branch can be issued in the last issue_slot, it is
18597 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18598 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18599 in this case the last nop will start a new group and the branch
18600 will be forced to the new group. */
18601 if (can_issue_more && !is_branch_slot_insn (next_insn))
18602 can_issue_more--;
18603
18604 /* Do we have a special group ending nop? */
18605 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18606 || rs6000_tune == PROCESSOR_POWER8)
18607 {
18608 nop = gen_group_ending_nop ();
18609 emit_insn_before (nop, next_insn);
18610 can_issue_more = 0;
18611 }
18612 else
18613 while (can_issue_more > 0)
18614 {
18615 nop = gen_nop ();
18616 emit_insn_before (nop, next_insn);
18617 can_issue_more--;
18618 }
18619
18620 *group_end = true;
18621 return 0;
18622 }
18623
18624 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18625 {
18626 int n_nops = rs6000_sched_insert_nops;
18627
18628 /* Nops can't be issued from the branch slot, so the effective
18629 issue_rate for nops is 'issue_rate - 1'. */
18630 if (can_issue_more == 0)
18631 can_issue_more = issue_rate;
18632 can_issue_more--;
18633 if (can_issue_more == 0)
18634 {
18635 can_issue_more = issue_rate - 1;
18636 (*group_count)++;
18637 end = true;
18638 for (i = 0; i < issue_rate; i++)
18639 {
18640 group_insns[i] = 0;
18641 }
18642 }
18643
18644 while (n_nops > 0)
18645 {
18646 nop = gen_nop ();
18647 emit_insn_before (nop, next_insn);
18648 if (can_issue_more == issue_rate - 1) /* new group begins */
18649 end = false;
18650 can_issue_more--;
18651 if (can_issue_more == 0)
18652 {
18653 can_issue_more = issue_rate - 1;
18654 (*group_count)++;
18655 end = true;
18656 for (i = 0; i < issue_rate; i++)
18657 {
18658 group_insns[i] = 0;
18659 }
18660 }
18661 n_nops--;
18662 }
18663
18664 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18665 can_issue_more++;
18666
18667 /* Is next_insn going to start a new group? */
18668 *group_end
18669 = (end
18670 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18671 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18672 || (can_issue_more < issue_rate &&
18673 insn_terminates_group_p (next_insn, previous_group)));
18674 if (*group_end && end)
18675 (*group_count)--;
18676
18677 if (sched_verbose > 6)
18678 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18679 *group_count, can_issue_more);
18680 return can_issue_more;
18681 }
18682
18683 return can_issue_more;
18684 }
18685
18686 /* This function tries to synch the dispatch groups that the compiler "sees"
18687 with the dispatch groups that the processor dispatcher is expected to
18688 form in practice. It tries to achieve this synchronization by forcing the
18689 estimated processor grouping on the compiler (as opposed to the function
18690 'pad_goups' which tries to force the scheduler's grouping on the processor).
18691
18692 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18693 examines the (estimated) dispatch groups that will be formed by the processor
18694 dispatcher. It marks these group boundaries to reflect the estimated
18695 processor grouping, overriding the grouping that the scheduler had marked.
18696 Depending on the value of the flag '-minsert-sched-nops' this function can
18697 force certain insns into separate groups or force a certain distance between
18698 them by inserting nops, for example, if there exists a "costly dependence"
18699 between the insns.
18700
18701 The function estimates the group boundaries that the processor will form as
18702 follows: It keeps track of how many vacant issue slots are available after
18703 each insn. A subsequent insn will start a new group if one of the following
18704 4 cases applies:
18705 - no more vacant issue slots remain in the current dispatch group.
18706 - only the last issue slot, which is the branch slot, is vacant, but the next
18707 insn is not a branch.
18708 - only the last 2 or less issue slots, including the branch slot, are vacant,
18709 which means that a cracked insn (which occupies two issue slots) can't be
18710 issued in this group.
18711 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18712 start a new group. */
18713
18714 static int
18715 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18716 rtx_insn *tail)
18717 {
18718 rtx_insn *insn, *next_insn;
18719 int issue_rate;
18720 int can_issue_more;
18721 int slot, i;
18722 bool group_end;
18723 int group_count = 0;
18724 rtx *group_insns;
18725
18726 /* Initialize. */
18727 issue_rate = rs6000_issue_rate ();
18728 group_insns = XALLOCAVEC (rtx, issue_rate);
18729 for (i = 0; i < issue_rate; i++)
18730 {
18731 group_insns[i] = 0;
18732 }
18733 can_issue_more = issue_rate;
18734 slot = 0;
18735 insn = get_next_active_insn (prev_head_insn, tail);
18736 group_end = false;
18737
18738 while (insn != NULL_RTX)
18739 {
18740 slot = (issue_rate - can_issue_more);
18741 group_insns[slot] = insn;
18742 can_issue_more =
18743 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18744 if (insn_terminates_group_p (insn, current_group))
18745 can_issue_more = 0;
18746
18747 next_insn = get_next_active_insn (insn, tail);
18748 if (next_insn == NULL_RTX)
18749 return group_count + 1;
18750
18751 /* Is next_insn going to start a new group? */
18752 group_end
18753 = (can_issue_more == 0
18754 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18755 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18756 || (can_issue_more < issue_rate &&
18757 insn_terminates_group_p (next_insn, previous_group)));
18758
18759 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18760 next_insn, &group_end, can_issue_more,
18761 &group_count);
18762
18763 if (group_end)
18764 {
18765 group_count++;
18766 can_issue_more = 0;
18767 for (i = 0; i < issue_rate; i++)
18768 {
18769 group_insns[i] = 0;
18770 }
18771 }
18772
18773 if (GET_MODE (next_insn) == TImode && can_issue_more)
18774 PUT_MODE (next_insn, VOIDmode);
18775 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18776 PUT_MODE (next_insn, TImode);
18777
18778 insn = next_insn;
18779 if (can_issue_more == 0)
18780 can_issue_more = issue_rate;
18781 } /* while */
18782
18783 return group_count;
18784 }
18785
18786 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18787 dispatch group boundaries that the scheduler had marked. Pad with nops
18788 any dispatch groups which have vacant issue slots, in order to force the
18789 scheduler's grouping on the processor dispatcher. The function
18790 returns the number of dispatch groups found. */
18791
18792 static int
18793 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18794 rtx_insn *tail)
18795 {
18796 rtx_insn *insn, *next_insn;
18797 rtx nop;
18798 int issue_rate;
18799 int can_issue_more;
18800 int group_end;
18801 int group_count = 0;
18802
18803 /* Initialize issue_rate. */
18804 issue_rate = rs6000_issue_rate ();
18805 can_issue_more = issue_rate;
18806
18807 insn = get_next_active_insn (prev_head_insn, tail);
18808 next_insn = get_next_active_insn (insn, tail);
18809
18810 while (insn != NULL_RTX)
18811 {
18812 can_issue_more =
18813 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18814
18815 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18816
18817 if (next_insn == NULL_RTX)
18818 break;
18819
18820 if (group_end)
18821 {
18822 /* If the scheduler had marked group termination at this location
18823 (between insn and next_insn), and neither insn nor next_insn will
18824 force group termination, pad the group with nops to force group
18825 termination. */
18826 if (can_issue_more
18827 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18828 && !insn_terminates_group_p (insn, current_group)
18829 && !insn_terminates_group_p (next_insn, previous_group))
18830 {
18831 if (!is_branch_slot_insn (next_insn))
18832 can_issue_more--;
18833
18834 while (can_issue_more)
18835 {
18836 nop = gen_nop ();
18837 emit_insn_before (nop, next_insn);
18838 can_issue_more--;
18839 }
18840 }
18841
18842 can_issue_more = issue_rate;
18843 group_count++;
18844 }
18845
18846 insn = next_insn;
18847 next_insn = get_next_active_insn (insn, tail);
18848 }
18849
18850 return group_count;
18851 }
18852
18853 /* We're beginning a new block. Initialize data structures as necessary. */
18854
18855 static void
18856 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18857 int sched_verbose ATTRIBUTE_UNUSED,
18858 int max_ready ATTRIBUTE_UNUSED)
18859 {
18860 last_scheduled_insn = NULL;
18861 load_store_pendulum = 0;
18862 divide_cnt = 0;
18863 vec_pairing = 0;
18864 }
18865
18866 /* The following function is called at the end of scheduling BB.
18867 After reload, it inserts nops at insn group bundling. */
18868
18869 static void
18870 rs6000_sched_finish (FILE *dump, int sched_verbose)
18871 {
18872 int n_groups;
18873
18874 if (sched_verbose)
18875 fprintf (dump, "=== Finishing schedule.\n");
18876
18877 if (reload_completed && rs6000_sched_groups)
18878 {
18879 /* Do not run sched_finish hook when selective scheduling enabled. */
18880 if (sel_sched_p ())
18881 return;
18882
18883 if (rs6000_sched_insert_nops == sched_finish_none)
18884 return;
18885
18886 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18887 n_groups = pad_groups (dump, sched_verbose,
18888 current_sched_info->prev_head,
18889 current_sched_info->next_tail);
18890 else
18891 n_groups = redefine_groups (dump, sched_verbose,
18892 current_sched_info->prev_head,
18893 current_sched_info->next_tail);
18894
18895 if (sched_verbose >= 6)
18896 {
18897 fprintf (dump, "ngroups = %d\n", n_groups);
18898 print_rtl (dump, current_sched_info->prev_head);
18899 fprintf (dump, "Done finish_sched\n");
18900 }
18901 }
18902 }
18903
18904 struct rs6000_sched_context
18905 {
18906 short cached_can_issue_more;
18907 rtx_insn *last_scheduled_insn;
18908 int load_store_pendulum;
18909 int divide_cnt;
18910 int vec_pairing;
18911 };
18912
18913 typedef struct rs6000_sched_context rs6000_sched_context_def;
18914 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18915
18916 /* Allocate store for new scheduling context. */
18917 static void *
18918 rs6000_alloc_sched_context (void)
18919 {
18920 return xmalloc (sizeof (rs6000_sched_context_def));
18921 }
18922
18923 /* If CLEAN_P is true then initializes _SC with clean data,
18924 and from the global context otherwise. */
18925 static void
18926 rs6000_init_sched_context (void *_sc, bool clean_p)
18927 {
18928 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18929
18930 if (clean_p)
18931 {
18932 sc->cached_can_issue_more = 0;
18933 sc->last_scheduled_insn = NULL;
18934 sc->load_store_pendulum = 0;
18935 sc->divide_cnt = 0;
18936 sc->vec_pairing = 0;
18937 }
18938 else
18939 {
18940 sc->cached_can_issue_more = cached_can_issue_more;
18941 sc->last_scheduled_insn = last_scheduled_insn;
18942 sc->load_store_pendulum = load_store_pendulum;
18943 sc->divide_cnt = divide_cnt;
18944 sc->vec_pairing = vec_pairing;
18945 }
18946 }
18947
18948 /* Sets the global scheduling context to the one pointed to by _SC. */
18949 static void
18950 rs6000_set_sched_context (void *_sc)
18951 {
18952 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18953
18954 gcc_assert (sc != NULL);
18955
18956 cached_can_issue_more = sc->cached_can_issue_more;
18957 last_scheduled_insn = sc->last_scheduled_insn;
18958 load_store_pendulum = sc->load_store_pendulum;
18959 divide_cnt = sc->divide_cnt;
18960 vec_pairing = sc->vec_pairing;
18961 }
18962
18963 /* Free _SC. */
18964 static void
18965 rs6000_free_sched_context (void *_sc)
18966 {
18967 gcc_assert (_sc != NULL);
18968
18969 free (_sc);
18970 }
18971
18972 static bool
18973 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18974 {
18975 switch (get_attr_type (insn))
18976 {
18977 case TYPE_DIV:
18978 case TYPE_SDIV:
18979 case TYPE_DDIV:
18980 case TYPE_VECDIV:
18981 case TYPE_SSQRT:
18982 case TYPE_DSQRT:
18983 return false;
18984
18985 default:
18986 return true;
18987 }
18988 }
18989 \f
18990 /* Length in units of the trampoline for entering a nested function. */
18991
18992 int
18993 rs6000_trampoline_size (void)
18994 {
18995 int ret = 0;
18996
18997 switch (DEFAULT_ABI)
18998 {
18999 default:
19000 gcc_unreachable ();
19001
19002 case ABI_AIX:
19003 ret = (TARGET_32BIT) ? 12 : 24;
19004 break;
19005
19006 case ABI_ELFv2:
19007 gcc_assert (!TARGET_32BIT);
19008 ret = 32;
19009 break;
19010
19011 case ABI_DARWIN:
19012 case ABI_V4:
19013 ret = (TARGET_32BIT) ? 40 : 48;
19014 break;
19015 }
19016
19017 return ret;
19018 }
19019
19020 /* Emit RTL insns to initialize the variable parts of a trampoline.
19021 FNADDR is an RTX for the address of the function's pure code.
19022 CXT is an RTX for the static chain value for the function. */
19023
19024 static void
19025 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19026 {
19027 int regsize = (TARGET_32BIT) ? 4 : 8;
19028 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19029 rtx ctx_reg = force_reg (Pmode, cxt);
19030 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19031
19032 switch (DEFAULT_ABI)
19033 {
19034 default:
19035 gcc_unreachable ();
19036
19037 /* Under AIX, just build the 3 word function descriptor */
19038 case ABI_AIX:
19039 {
19040 rtx fnmem, fn_reg, toc_reg;
19041
19042 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19043 error ("you cannot take the address of a nested function if you use "
19044 "the %qs option", "-mno-pointers-to-nested-functions");
19045
19046 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19047 fn_reg = gen_reg_rtx (Pmode);
19048 toc_reg = gen_reg_rtx (Pmode);
19049
19050 /* Macro to shorten the code expansions below. */
19051 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19052
19053 m_tramp = replace_equiv_address (m_tramp, addr);
19054
19055 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19056 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19057 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19058 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19059 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19060
19061 # undef MEM_PLUS
19062 }
19063 break;
19064
19065 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19066 case ABI_ELFv2:
19067 case ABI_DARWIN:
19068 case ABI_V4:
19069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19070 LCT_NORMAL, VOIDmode,
19071 addr, Pmode,
19072 GEN_INT (rs6000_trampoline_size ()), SImode,
19073 fnaddr, Pmode,
19074 ctx_reg, Pmode);
19075 break;
19076 }
19077 }
19078
19079 \f
19080 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19081 identifier as an argument, so the front end shouldn't look it up. */
19082
19083 static bool
19084 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19085 {
19086 return is_attribute_p ("altivec", attr_id);
19087 }
19088
19089 /* Handle the "altivec" attribute. The attribute may have
19090 arguments as follows:
19091
19092 __attribute__((altivec(vector__)))
19093 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19094 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19095
19096 and may appear more than once (e.g., 'vector bool char') in a
19097 given declaration. */
19098
19099 static tree
19100 rs6000_handle_altivec_attribute (tree *node,
19101 tree name ATTRIBUTE_UNUSED,
19102 tree args,
19103 int flags ATTRIBUTE_UNUSED,
19104 bool *no_add_attrs)
19105 {
19106 tree type = *node, result = NULL_TREE;
19107 machine_mode mode;
19108 int unsigned_p;
19109 char altivec_type
19110 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19111 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19112 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19113 : '?');
19114
19115 while (POINTER_TYPE_P (type)
19116 || TREE_CODE (type) == FUNCTION_TYPE
19117 || TREE_CODE (type) == METHOD_TYPE
19118 || TREE_CODE (type) == ARRAY_TYPE)
19119 type = TREE_TYPE (type);
19120
19121 mode = TYPE_MODE (type);
19122
19123 /* Check for invalid AltiVec type qualifiers. */
19124 if (type == long_double_type_node)
19125 error ("use of %<long double%> in AltiVec types is invalid");
19126 else if (type == boolean_type_node)
19127 error ("use of boolean types in AltiVec types is invalid");
19128 else if (TREE_CODE (type) == COMPLEX_TYPE)
19129 error ("use of %<complex%> in AltiVec types is invalid");
19130 else if (DECIMAL_FLOAT_MODE_P (mode))
19131 error ("use of decimal floating point types in AltiVec types is invalid");
19132 else if (!TARGET_VSX)
19133 {
19134 if (type == long_unsigned_type_node || type == long_integer_type_node)
19135 {
19136 if (TARGET_64BIT)
19137 error ("use of %<long%> in AltiVec types is invalid for "
19138 "64-bit code without %qs", "-mvsx");
19139 else if (rs6000_warn_altivec_long)
19140 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19141 "use %<int%>");
19142 }
19143 else if (type == long_long_unsigned_type_node
19144 || type == long_long_integer_type_node)
19145 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19146 "-mvsx");
19147 else if (type == double_type_node)
19148 error ("use of %<double%> in AltiVec types is invalid without %qs",
19149 "-mvsx");
19150 }
19151
19152 switch (altivec_type)
19153 {
19154 case 'v':
19155 unsigned_p = TYPE_UNSIGNED (type);
19156 switch (mode)
19157 {
19158 case E_TImode:
19159 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19160 break;
19161 case E_DImode:
19162 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19163 break;
19164 case E_SImode:
19165 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19166 break;
19167 case E_HImode:
19168 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19169 break;
19170 case E_QImode:
19171 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19172 break;
19173 case E_SFmode: result = V4SF_type_node; break;
19174 case E_DFmode: result = V2DF_type_node; break;
19175 /* If the user says 'vector int bool', we may be handed the 'bool'
19176 attribute _before_ the 'vector' attribute, and so select the
19177 proper type in the 'b' case below. */
19178 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19179 case E_V2DImode: case E_V2DFmode:
19180 result = type;
19181 default: break;
19182 }
19183 break;
19184 case 'b':
19185 switch (mode)
19186 {
19187 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19188 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19189 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19190 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19191 default: break;
19192 }
19193 break;
19194 case 'p':
19195 switch (mode)
19196 {
19197 case E_V8HImode: result = pixel_V8HI_type_node;
19198 default: break;
19199 }
19200 default: break;
19201 }
19202
19203 /* Propagate qualifiers attached to the element type
19204 onto the vector type. */
19205 if (result && result != type && TYPE_QUALS (type))
19206 result = build_qualified_type (result, TYPE_QUALS (type));
19207
19208 *no_add_attrs = true; /* No need to hang on to the attribute. */
19209
19210 if (result)
19211 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19212
19213 return NULL_TREE;
19214 }
19215
19216 /* AltiVec defines five built-in scalar types that serve as vector
19217 elements; we must teach the compiler how to mangle them. The 128-bit
19218 floating point mangling is target-specific as well. */
19219
19220 static const char *
19221 rs6000_mangle_type (const_tree type)
19222 {
19223 type = TYPE_MAIN_VARIANT (type);
19224
19225 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19226 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19227 return NULL;
19228
19229 if (type == bool_char_type_node) return "U6__boolc";
19230 if (type == bool_short_type_node) return "U6__bools";
19231 if (type == pixel_type_node) return "u7__pixel";
19232 if (type == bool_int_type_node) return "U6__booli";
19233 if (type == bool_long_long_type_node) return "U6__boolx";
19234
19235 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19236 return "g";
19237 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19238 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19239
19240 /* For all other types, use the default mangling. */
19241 return NULL;
19242 }
19243
19244 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19245 struct attribute_spec.handler. */
19246
19247 static tree
19248 rs6000_handle_longcall_attribute (tree *node, tree name,
19249 tree args ATTRIBUTE_UNUSED,
19250 int flags ATTRIBUTE_UNUSED,
19251 bool *no_add_attrs)
19252 {
19253 if (TREE_CODE (*node) != FUNCTION_TYPE
19254 && TREE_CODE (*node) != FIELD_DECL
19255 && TREE_CODE (*node) != TYPE_DECL)
19256 {
19257 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19258 name);
19259 *no_add_attrs = true;
19260 }
19261
19262 return NULL_TREE;
19263 }
19264
19265 /* Set longcall attributes on all functions declared when
19266 rs6000_default_long_calls is true. */
19267 static void
19268 rs6000_set_default_type_attributes (tree type)
19269 {
19270 if (rs6000_default_long_calls
19271 && (TREE_CODE (type) == FUNCTION_TYPE
19272 || TREE_CODE (type) == METHOD_TYPE))
19273 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19274 NULL_TREE,
19275 TYPE_ATTRIBUTES (type));
19276
19277 #if TARGET_MACHO
19278 darwin_set_default_type_attributes (type);
19279 #endif
19280 }
19281
19282 /* Return a reference suitable for calling a function with the
19283 longcall attribute. */
19284
19285 static rtx
19286 rs6000_longcall_ref (rtx call_ref, rtx arg)
19287 {
19288 /* System V adds '.' to the internal name, so skip them. */
19289 const char *call_name = XSTR (call_ref, 0);
19290 if (*call_name == '.')
19291 {
19292 while (*call_name == '.')
19293 call_name++;
19294
19295 tree node = get_identifier (call_name);
19296 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19297 }
19298
19299 if (TARGET_PLTSEQ)
19300 {
19301 rtx base = const0_rtx;
19302 int regno = 12;
19303 if (rs6000_pcrel_p (cfun))
19304 {
19305 rtx reg = gen_rtx_REG (Pmode, regno);
19306 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19307 gen_rtvec (3, base, call_ref, arg),
19308 UNSPECV_PLT_PCREL);
19309 emit_insn (gen_rtx_SET (reg, u));
19310 return reg;
19311 }
19312
19313 if (DEFAULT_ABI == ABI_ELFv2)
19314 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19315 else
19316 {
19317 if (flag_pic)
19318 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19319 regno = 11;
19320 }
19321 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19322 may be used by a function global entry point. For SysV4, r11
19323 is used by __glink_PLTresolve lazy resolver entry. */
19324 rtx reg = gen_rtx_REG (Pmode, regno);
19325 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19326 UNSPEC_PLT16_HA);
19327 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
19328 gen_rtvec (3, reg, call_ref, arg),
19329 UNSPECV_PLT16_LO);
19330 emit_insn (gen_rtx_SET (reg, hi));
19331 emit_insn (gen_rtx_SET (reg, lo));
19332 return reg;
19333 }
19334
19335 return force_reg (Pmode, call_ref);
19336 }
19337 \f
19338 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19339 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19340 #endif
19341
19342 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19343 struct attribute_spec.handler. */
19344 static tree
19345 rs6000_handle_struct_attribute (tree *node, tree name,
19346 tree args ATTRIBUTE_UNUSED,
19347 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19348 {
19349 tree *type = NULL;
19350 if (DECL_P (*node))
19351 {
19352 if (TREE_CODE (*node) == TYPE_DECL)
19353 type = &TREE_TYPE (*node);
19354 }
19355 else
19356 type = node;
19357
19358 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19359 || TREE_CODE (*type) == UNION_TYPE)))
19360 {
19361 warning (OPT_Wattributes, "%qE attribute ignored", name);
19362 *no_add_attrs = true;
19363 }
19364
19365 else if ((is_attribute_p ("ms_struct", name)
19366 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19367 || ((is_attribute_p ("gcc_struct", name)
19368 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19369 {
19370 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19371 name);
19372 *no_add_attrs = true;
19373 }
19374
19375 return NULL_TREE;
19376 }
19377
19378 static bool
19379 rs6000_ms_bitfield_layout_p (const_tree record_type)
19380 {
19381 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19382 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19383 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19384 }
19385 \f
19386 #ifdef USING_ELFOS_H
19387
19388 /* A get_unnamed_section callback, used for switching to toc_section. */
19389
19390 static void
19391 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19392 {
19393 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19394 && TARGET_MINIMAL_TOC)
19395 {
19396 if (!toc_initialized)
19397 {
19398 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19399 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19400 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19401 fprintf (asm_out_file, "\t.tc ");
19402 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19403 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19404 fprintf (asm_out_file, "\n");
19405
19406 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19407 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19408 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19409 fprintf (asm_out_file, " = .+32768\n");
19410 toc_initialized = 1;
19411 }
19412 else
19413 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19414 }
19415 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19416 {
19417 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19418 if (!toc_initialized)
19419 {
19420 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19421 toc_initialized = 1;
19422 }
19423 }
19424 else
19425 {
19426 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19427 if (!toc_initialized)
19428 {
19429 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19430 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19431 fprintf (asm_out_file, " = .+32768\n");
19432 toc_initialized = 1;
19433 }
19434 }
19435 }
19436
19437 /* Implement TARGET_ASM_INIT_SECTIONS. */
19438
19439 static void
19440 rs6000_elf_asm_init_sections (void)
19441 {
19442 toc_section
19443 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19444
19445 sdata2_section
19446 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19447 SDATA2_SECTION_ASM_OP);
19448 }
19449
19450 /* Implement TARGET_SELECT_RTX_SECTION. */
19451
19452 static section *
19453 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19454 unsigned HOST_WIDE_INT align)
19455 {
19456 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19457 return toc_section;
19458 else
19459 return default_elf_select_rtx_section (mode, x, align);
19460 }
19461 \f
19462 /* For a SYMBOL_REF, set generic flags and then perform some
19463 target-specific processing.
19464
19465 When the AIX ABI is requested on a non-AIX system, replace the
19466 function name with the real name (with a leading .) rather than the
19467 function descriptor name. This saves a lot of overriding code to
19468 read the prefixes. */
19469
19470 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19471 static void
19472 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19473 {
19474 default_encode_section_info (decl, rtl, first);
19475
19476 if (first
19477 && TREE_CODE (decl) == FUNCTION_DECL
19478 && !TARGET_AIX
19479 && DEFAULT_ABI == ABI_AIX)
19480 {
19481 rtx sym_ref = XEXP (rtl, 0);
19482 size_t len = strlen (XSTR (sym_ref, 0));
19483 char *str = XALLOCAVEC (char, len + 2);
19484 str[0] = '.';
19485 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19486 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19487 }
19488 }
19489
19490 static inline bool
19491 compare_section_name (const char *section, const char *templ)
19492 {
19493 int len;
19494
19495 len = strlen (templ);
19496 return (strncmp (section, templ, len) == 0
19497 && (section[len] == 0 || section[len] == '.'));
19498 }
19499
19500 bool
19501 rs6000_elf_in_small_data_p (const_tree decl)
19502 {
19503 if (rs6000_sdata == SDATA_NONE)
19504 return false;
19505
19506 /* We want to merge strings, so we never consider them small data. */
19507 if (TREE_CODE (decl) == STRING_CST)
19508 return false;
19509
19510 /* Functions are never in the small data area. */
19511 if (TREE_CODE (decl) == FUNCTION_DECL)
19512 return false;
19513
19514 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19515 {
19516 const char *section = DECL_SECTION_NAME (decl);
19517 if (compare_section_name (section, ".sdata")
19518 || compare_section_name (section, ".sdata2")
19519 || compare_section_name (section, ".gnu.linkonce.s")
19520 || compare_section_name (section, ".sbss")
19521 || compare_section_name (section, ".sbss2")
19522 || compare_section_name (section, ".gnu.linkonce.sb")
19523 || strcmp (section, ".PPC.EMB.sdata0") == 0
19524 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19525 return true;
19526 }
19527 else
19528 {
19529 /* If we are told not to put readonly data in sdata, then don't. */
19530 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19531 && !rs6000_readonly_in_sdata)
19532 return false;
19533
19534 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19535
19536 if (size > 0
19537 && size <= g_switch_value
19538 /* If it's not public, and we're not going to reference it there,
19539 there's no need to put it in the small data section. */
19540 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19541 return true;
19542 }
19543
19544 return false;
19545 }
19546
19547 #endif /* USING_ELFOS_H */
19548 \f
19549 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19550
19551 static bool
19552 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19553 {
19554 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19555 }
19556
19557 /* Do not place thread-local symbols refs in the object blocks. */
19558
19559 static bool
19560 rs6000_use_blocks_for_decl_p (const_tree decl)
19561 {
19562 return !DECL_THREAD_LOCAL_P (decl);
19563 }
19564 \f
19565 /* Return a REG that occurs in ADDR with coefficient 1.
19566 ADDR can be effectively incremented by incrementing REG.
19567
19568 r0 is special and we must not select it as an address
19569 register by this routine since our caller will try to
19570 increment the returned register via an "la" instruction. */
19571
19572 rtx
19573 find_addr_reg (rtx addr)
19574 {
19575 while (GET_CODE (addr) == PLUS)
19576 {
19577 if (REG_P (XEXP (addr, 0))
19578 && REGNO (XEXP (addr, 0)) != 0)
19579 addr = XEXP (addr, 0);
19580 else if (REG_P (XEXP (addr, 1))
19581 && REGNO (XEXP (addr, 1)) != 0)
19582 addr = XEXP (addr, 1);
19583 else if (CONSTANT_P (XEXP (addr, 0)))
19584 addr = XEXP (addr, 1);
19585 else if (CONSTANT_P (XEXP (addr, 1)))
19586 addr = XEXP (addr, 0);
19587 else
19588 gcc_unreachable ();
19589 }
19590 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19591 return addr;
19592 }
19593
19594 void
19595 rs6000_fatal_bad_address (rtx op)
19596 {
19597 fatal_insn ("bad address", op);
19598 }
19599
19600 #if TARGET_MACHO
19601
19602 vec<branch_island, va_gc> *branch_islands;
19603
19604 /* Remember to generate a branch island for far calls to the given
19605 function. */
19606
19607 static void
19608 add_compiler_branch_island (tree label_name, tree function_name,
19609 int line_number)
19610 {
19611 branch_island bi = {function_name, label_name, line_number};
19612 vec_safe_push (branch_islands, bi);
19613 }
19614
19615 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19616 already there or not. */
19617
19618 static int
19619 no_previous_def (tree function_name)
19620 {
19621 branch_island *bi;
19622 unsigned ix;
19623
19624 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19625 if (function_name == bi->function_name)
19626 return 0;
19627 return 1;
19628 }
19629
19630 /* GET_PREV_LABEL gets the label name from the previous definition of
19631 the function. */
19632
19633 static tree
19634 get_prev_label (tree function_name)
19635 {
19636 branch_island *bi;
19637 unsigned ix;
19638
19639 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19640 if (function_name == bi->function_name)
19641 return bi->label_name;
19642 return NULL_TREE;
19643 }
19644
19645 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19646
19647 void
19648 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19649 {
19650 unsigned int length;
19651 char *symbol_name, *lazy_ptr_name;
19652 char *local_label_0;
19653 static unsigned label = 0;
19654
19655 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19656 symb = (*targetm.strip_name_encoding) (symb);
19657
19658 length = strlen (symb);
19659 symbol_name = XALLOCAVEC (char, length + 32);
19660 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19661
19662 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19663 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19664
19665 if (MACHOPIC_PURE)
19666 {
19667 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19668 fprintf (file, "\t.align 5\n");
19669
19670 fprintf (file, "%s:\n", stub);
19671 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19672
19673 label++;
19674 local_label_0 = XALLOCAVEC (char, 16);
19675 sprintf (local_label_0, "L%u$spb", label);
19676
19677 fprintf (file, "\tmflr r0\n");
19678 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19679 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19680 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19681 lazy_ptr_name, local_label_0);
19682 fprintf (file, "\tmtlr r0\n");
19683 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19684 (TARGET_64BIT ? "ldu" : "lwzu"),
19685 lazy_ptr_name, local_label_0);
19686 fprintf (file, "\tmtctr r12\n");
19687 fprintf (file, "\tbctr\n");
19688 }
19689 else /* mdynamic-no-pic or mkernel. */
19690 {
19691 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19692 fprintf (file, "\t.align 4\n");
19693
19694 fprintf (file, "%s:\n", stub);
19695 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19696
19697 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19698 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19699 (TARGET_64BIT ? "ldu" : "lwzu"),
19700 lazy_ptr_name);
19701 fprintf (file, "\tmtctr r12\n");
19702 fprintf (file, "\tbctr\n");
19703 }
19704
19705 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19706 fprintf (file, "%s:\n", lazy_ptr_name);
19707 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19708 fprintf (file, "%sdyld_stub_binding_helper\n",
19709 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19710 }
19711
19712 /* Legitimize PIC addresses. If the address is already
19713 position-independent, we return ORIG. Newly generated
19714 position-independent addresses go into a reg. This is REG if non
19715 zero, otherwise we allocate register(s) as necessary. */
19716
19717 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19718
19719 rtx
19720 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19721 rtx reg)
19722 {
19723 rtx base, offset;
19724
19725 if (reg == NULL && !reload_completed)
19726 reg = gen_reg_rtx (Pmode);
19727
19728 if (GET_CODE (orig) == CONST)
19729 {
19730 rtx reg_temp;
19731
19732 if (GET_CODE (XEXP (orig, 0)) == PLUS
19733 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19734 return orig;
19735
19736 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19737
19738 /* Use a different reg for the intermediate value, as
19739 it will be marked UNCHANGING. */
19740 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19741 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19742 Pmode, reg_temp);
19743 offset =
19744 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19745 Pmode, reg);
19746
19747 if (CONST_INT_P (offset))
19748 {
19749 if (SMALL_INT (offset))
19750 return plus_constant (Pmode, base, INTVAL (offset));
19751 else if (!reload_completed)
19752 offset = force_reg (Pmode, offset);
19753 else
19754 {
19755 rtx mem = force_const_mem (Pmode, orig);
19756 return machopic_legitimize_pic_address (mem, Pmode, reg);
19757 }
19758 }
19759 return gen_rtx_PLUS (Pmode, base, offset);
19760 }
19761
19762 /* Fall back on generic machopic code. */
19763 return machopic_legitimize_pic_address (orig, mode, reg);
19764 }
19765
19766 /* Output a .machine directive for the Darwin assembler, and call
19767 the generic start_file routine. */
19768
19769 static void
19770 rs6000_darwin_file_start (void)
19771 {
19772 static const struct
19773 {
19774 const char *arg;
19775 const char *name;
19776 HOST_WIDE_INT if_set;
19777 } mapping[] = {
19778 { "ppc64", "ppc64", MASK_64BIT },
19779 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19780 { "power4", "ppc970", 0 },
19781 { "G5", "ppc970", 0 },
19782 { "7450", "ppc7450", 0 },
19783 { "7400", "ppc7400", MASK_ALTIVEC },
19784 { "G4", "ppc7400", 0 },
19785 { "750", "ppc750", 0 },
19786 { "740", "ppc750", 0 },
19787 { "G3", "ppc750", 0 },
19788 { "604e", "ppc604e", 0 },
19789 { "604", "ppc604", 0 },
19790 { "603e", "ppc603", 0 },
19791 { "603", "ppc603", 0 },
19792 { "601", "ppc601", 0 },
19793 { NULL, "ppc", 0 } };
19794 const char *cpu_id = "";
19795 size_t i;
19796
19797 rs6000_file_start ();
19798 darwin_file_start ();
19799
19800 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19801
19802 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19803 cpu_id = rs6000_default_cpu;
19804
19805 if (global_options_set.x_rs6000_cpu_index)
19806 cpu_id = processor_target_table[rs6000_cpu_index].name;
19807
19808 /* Look through the mapping array. Pick the first name that either
19809 matches the argument, has a bit set in IF_SET that is also set
19810 in the target flags, or has a NULL name. */
19811
19812 i = 0;
19813 while (mapping[i].arg != NULL
19814 && strcmp (mapping[i].arg, cpu_id) != 0
19815 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19816 i++;
19817
19818 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19819 }
19820
19821 #endif /* TARGET_MACHO */
19822
19823 #if TARGET_ELF
19824 static int
19825 rs6000_elf_reloc_rw_mask (void)
19826 {
19827 if (flag_pic)
19828 return 3;
19829 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19830 return 2;
19831 else
19832 return 0;
19833 }
19834
19835 /* Record an element in the table of global constructors. SYMBOL is
19836 a SYMBOL_REF of the function to be called; PRIORITY is a number
19837 between 0 and MAX_INIT_PRIORITY.
19838
19839 This differs from default_named_section_asm_out_constructor in
19840 that we have special handling for -mrelocatable. */
19841
19842 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19843 static void
19844 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19845 {
19846 const char *section = ".ctors";
19847 char buf[18];
19848
19849 if (priority != DEFAULT_INIT_PRIORITY)
19850 {
19851 sprintf (buf, ".ctors.%.5u",
19852 /* Invert the numbering so the linker puts us in the proper
19853 order; constructors are run from right to left, and the
19854 linker sorts in increasing order. */
19855 MAX_INIT_PRIORITY - priority);
19856 section = buf;
19857 }
19858
19859 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19860 assemble_align (POINTER_SIZE);
19861
19862 if (DEFAULT_ABI == ABI_V4
19863 && (TARGET_RELOCATABLE || flag_pic > 1))
19864 {
19865 fputs ("\t.long (", asm_out_file);
19866 output_addr_const (asm_out_file, symbol);
19867 fputs (")@fixup\n", asm_out_file);
19868 }
19869 else
19870 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19871 }
19872
19873 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19874 static void
19875 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19876 {
19877 const char *section = ".dtors";
19878 char buf[18];
19879
19880 if (priority != DEFAULT_INIT_PRIORITY)
19881 {
19882 sprintf (buf, ".dtors.%.5u",
19883 /* Invert the numbering so the linker puts us in the proper
19884 order; constructors are run from right to left, and the
19885 linker sorts in increasing order. */
19886 MAX_INIT_PRIORITY - priority);
19887 section = buf;
19888 }
19889
19890 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19891 assemble_align (POINTER_SIZE);
19892
19893 if (DEFAULT_ABI == ABI_V4
19894 && (TARGET_RELOCATABLE || flag_pic > 1))
19895 {
19896 fputs ("\t.long (", asm_out_file);
19897 output_addr_const (asm_out_file, symbol);
19898 fputs (")@fixup\n", asm_out_file);
19899 }
19900 else
19901 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19902 }
19903
19904 void
19905 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19906 {
19907 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19908 {
19909 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19910 ASM_OUTPUT_LABEL (file, name);
19911 fputs (DOUBLE_INT_ASM_OP, file);
19912 rs6000_output_function_entry (file, name);
19913 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19914 if (DOT_SYMBOLS)
19915 {
19916 fputs ("\t.size\t", file);
19917 assemble_name (file, name);
19918 fputs (",24\n\t.type\t.", file);
19919 assemble_name (file, name);
19920 fputs (",@function\n", file);
19921 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19922 {
19923 fputs ("\t.globl\t.", file);
19924 assemble_name (file, name);
19925 putc ('\n', file);
19926 }
19927 }
19928 else
19929 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19930 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19931 rs6000_output_function_entry (file, name);
19932 fputs (":\n", file);
19933 return;
19934 }
19935
19936 int uses_toc;
19937 if (DEFAULT_ABI == ABI_V4
19938 && (TARGET_RELOCATABLE || flag_pic > 1)
19939 && !TARGET_SECURE_PLT
19940 && (!constant_pool_empty_p () || crtl->profile)
19941 && (uses_toc = uses_TOC ()))
19942 {
19943 char buf[256];
19944
19945 if (uses_toc == 2)
19946 switch_to_other_text_partition ();
19947 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19948
19949 fprintf (file, "\t.long ");
19950 assemble_name (file, toc_label_name);
19951 need_toc_init = 1;
19952 putc ('-', file);
19953 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19954 assemble_name (file, buf);
19955 putc ('\n', file);
19956 if (uses_toc == 2)
19957 switch_to_other_text_partition ();
19958 }
19959
19960 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19961 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19962
19963 if (TARGET_CMODEL == CMODEL_LARGE
19964 && rs6000_global_entry_point_prologue_needed_p ())
19965 {
19966 char buf[256];
19967
19968 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19969
19970 fprintf (file, "\t.quad .TOC.-");
19971 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19972 assemble_name (file, buf);
19973 putc ('\n', file);
19974 }
19975
19976 if (DEFAULT_ABI == ABI_AIX)
19977 {
19978 const char *desc_name, *orig_name;
19979
19980 orig_name = (*targetm.strip_name_encoding) (name);
19981 desc_name = orig_name;
19982 while (*desc_name == '.')
19983 desc_name++;
19984
19985 if (TREE_PUBLIC (decl))
19986 fprintf (file, "\t.globl %s\n", desc_name);
19987
19988 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19989 fprintf (file, "%s:\n", desc_name);
19990 fprintf (file, "\t.long %s\n", orig_name);
19991 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19992 fputs ("\t.long 0\n", file);
19993 fprintf (file, "\t.previous\n");
19994 }
19995 ASM_OUTPUT_LABEL (file, name);
19996 }
19997
19998 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19999 static void
20000 rs6000_elf_file_end (void)
20001 {
20002 #ifdef HAVE_AS_GNU_ATTRIBUTE
20003 /* ??? The value emitted depends on options active at file end.
20004 Assume anyone using #pragma or attributes that might change
20005 options knows what they are doing. */
20006 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20007 && rs6000_passes_float)
20008 {
20009 int fp;
20010
20011 if (TARGET_HARD_FLOAT)
20012 fp = 1;
20013 else
20014 fp = 2;
20015 if (rs6000_passes_long_double)
20016 {
20017 if (!TARGET_LONG_DOUBLE_128)
20018 fp |= 2 * 4;
20019 else if (TARGET_IEEEQUAD)
20020 fp |= 3 * 4;
20021 else
20022 fp |= 1 * 4;
20023 }
20024 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20025 }
20026 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20027 {
20028 if (rs6000_passes_vector)
20029 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20030 (TARGET_ALTIVEC_ABI ? 2 : 1));
20031 if (rs6000_returns_struct)
20032 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20033 aix_struct_return ? 2 : 1);
20034 }
20035 #endif
20036 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20037 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20038 file_end_indicate_exec_stack ();
20039 #endif
20040
20041 if (flag_split_stack)
20042 file_end_indicate_split_stack ();
20043
20044 if (cpu_builtin_p)
20045 {
20046 /* We have expanded a CPU builtin, so we need to emit a reference to
20047 the special symbol that LIBC uses to declare it supports the
20048 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20049 switch_to_section (data_section);
20050 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20051 fprintf (asm_out_file, "\t%s %s\n",
20052 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20053 }
20054 }
20055 #endif
20056
20057 #if TARGET_XCOFF
20058
20059 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20060 #define HAVE_XCOFF_DWARF_EXTRAS 0
20061 #endif
20062
20063 static enum unwind_info_type
20064 rs6000_xcoff_debug_unwind_info (void)
20065 {
20066 return UI_NONE;
20067 }
20068
20069 static void
20070 rs6000_xcoff_asm_output_anchor (rtx symbol)
20071 {
20072 char buffer[100];
20073
20074 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20075 SYMBOL_REF_BLOCK_OFFSET (symbol));
20076 fprintf (asm_out_file, "%s", SET_ASM_OP);
20077 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20078 fprintf (asm_out_file, ",");
20079 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20080 fprintf (asm_out_file, "\n");
20081 }
20082
20083 static void
20084 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20085 {
20086 fputs (GLOBAL_ASM_OP, stream);
20087 RS6000_OUTPUT_BASENAME (stream, name);
20088 putc ('\n', stream);
20089 }
20090
20091 /* A get_unnamed_decl callback, used for read-only sections. PTR
20092 points to the section string variable. */
20093
20094 static void
20095 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20096 {
20097 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20098 *(const char *const *) directive,
20099 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20100 }
20101
20102 /* Likewise for read-write sections. */
20103
20104 static void
20105 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20106 {
20107 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20108 *(const char *const *) directive,
20109 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20110 }
20111
20112 static void
20113 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20114 {
20115 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20116 *(const char *const *) directive,
20117 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20118 }
20119
20120 /* A get_unnamed_section callback, used for switching to toc_section. */
20121
20122 static void
20123 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20124 {
20125 if (TARGET_MINIMAL_TOC)
20126 {
20127 /* toc_section is always selected at least once from
20128 rs6000_xcoff_file_start, so this is guaranteed to
20129 always be defined once and only once in each file. */
20130 if (!toc_initialized)
20131 {
20132 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20133 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20134 toc_initialized = 1;
20135 }
20136 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20137 (TARGET_32BIT ? "" : ",3"));
20138 }
20139 else
20140 fputs ("\t.toc\n", asm_out_file);
20141 }
20142
20143 /* Implement TARGET_ASM_INIT_SECTIONS. */
20144
20145 static void
20146 rs6000_xcoff_asm_init_sections (void)
20147 {
20148 read_only_data_section
20149 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20150 &xcoff_read_only_section_name);
20151
20152 private_data_section
20153 = get_unnamed_section (SECTION_WRITE,
20154 rs6000_xcoff_output_readwrite_section_asm_op,
20155 &xcoff_private_data_section_name);
20156
20157 read_only_private_data_section
20158 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20159 &xcoff_private_rodata_section_name);
20160
20161 tls_data_section
20162 = get_unnamed_section (SECTION_TLS,
20163 rs6000_xcoff_output_tls_section_asm_op,
20164 &xcoff_tls_data_section_name);
20165
20166 tls_private_data_section
20167 = get_unnamed_section (SECTION_TLS,
20168 rs6000_xcoff_output_tls_section_asm_op,
20169 &xcoff_private_data_section_name);
20170
20171 toc_section
20172 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20173
20174 readonly_data_section = read_only_data_section;
20175 }
20176
20177 static int
20178 rs6000_xcoff_reloc_rw_mask (void)
20179 {
20180 return 3;
20181 }
20182
20183 static void
20184 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20185 tree decl ATTRIBUTE_UNUSED)
20186 {
20187 int smclass;
20188 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20189
20190 if (flags & SECTION_EXCLUDE)
20191 smclass = 4;
20192 else if (flags & SECTION_DEBUG)
20193 {
20194 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20195 return;
20196 }
20197 else if (flags & SECTION_CODE)
20198 smclass = 0;
20199 else if (flags & SECTION_TLS)
20200 smclass = 3;
20201 else if (flags & SECTION_WRITE)
20202 smclass = 2;
20203 else
20204 smclass = 1;
20205
20206 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20207 (flags & SECTION_CODE) ? "." : "",
20208 name, suffix[smclass], flags & SECTION_ENTSIZE);
20209 }
20210
20211 #define IN_NAMED_SECTION(DECL) \
20212 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20213 && DECL_SECTION_NAME (DECL) != NULL)
20214
20215 static section *
20216 rs6000_xcoff_select_section (tree decl, int reloc,
20217 unsigned HOST_WIDE_INT align)
20218 {
20219 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20220 named section. */
20221 if (align > BIGGEST_ALIGNMENT)
20222 {
20223 resolve_unique_section (decl, reloc, true);
20224 if (IN_NAMED_SECTION (decl))
20225 return get_named_section (decl, NULL, reloc);
20226 }
20227
20228 if (decl_readonly_section (decl, reloc))
20229 {
20230 if (TREE_PUBLIC (decl))
20231 return read_only_data_section;
20232 else
20233 return read_only_private_data_section;
20234 }
20235 else
20236 {
20237 #if HAVE_AS_TLS
20238 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20239 {
20240 if (TREE_PUBLIC (decl))
20241 return tls_data_section;
20242 else if (bss_initializer_p (decl))
20243 {
20244 /* Convert to COMMON to emit in BSS. */
20245 DECL_COMMON (decl) = 1;
20246 return tls_comm_section;
20247 }
20248 else
20249 return tls_private_data_section;
20250 }
20251 else
20252 #endif
20253 if (TREE_PUBLIC (decl))
20254 return data_section;
20255 else
20256 return private_data_section;
20257 }
20258 }
20259
20260 static void
20261 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20262 {
20263 const char *name;
20264
20265 /* Use select_section for private data and uninitialized data with
20266 alignment <= BIGGEST_ALIGNMENT. */
20267 if (!TREE_PUBLIC (decl)
20268 || DECL_COMMON (decl)
20269 || (DECL_INITIAL (decl) == NULL_TREE
20270 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20271 || DECL_INITIAL (decl) == error_mark_node
20272 || (flag_zero_initialized_in_bss
20273 && initializer_zerop (DECL_INITIAL (decl))))
20274 return;
20275
20276 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20277 name = (*targetm.strip_name_encoding) (name);
20278 set_decl_section_name (decl, name);
20279 }
20280
20281 /* Select section for constant in constant pool.
20282
20283 On RS/6000, all constants are in the private read-only data area.
20284 However, if this is being placed in the TOC it must be output as a
20285 toc entry. */
20286
20287 static section *
20288 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20289 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20290 {
20291 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20292 return toc_section;
20293 else
20294 return read_only_private_data_section;
20295 }
20296
20297 /* Remove any trailing [DS] or the like from the symbol name. */
20298
20299 static const char *
20300 rs6000_xcoff_strip_name_encoding (const char *name)
20301 {
20302 size_t len;
20303 if (*name == '*')
20304 name++;
20305 len = strlen (name);
20306 if (name[len - 1] == ']')
20307 return ggc_alloc_string (name, len - 4);
20308 else
20309 return name;
20310 }
20311
20312 /* Section attributes. AIX is always PIC. */
20313
20314 static unsigned int
20315 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20316 {
20317 unsigned int align;
20318 unsigned int flags = default_section_type_flags (decl, name, reloc);
20319
20320 /* Align to at least UNIT size. */
20321 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20322 align = MIN_UNITS_PER_WORD;
20323 else
20324 /* Increase alignment of large objects if not already stricter. */
20325 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20326 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20327 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20328
20329 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20330 }
20331
20332 /* Output at beginning of assembler file.
20333
20334 Initialize the section names for the RS/6000 at this point.
20335
20336 Specify filename, including full path, to assembler.
20337
20338 We want to go into the TOC section so at least one .toc will be emitted.
20339 Also, in order to output proper .bs/.es pairs, we need at least one static
20340 [RW] section emitted.
20341
20342 Finally, declare mcount when profiling to make the assembler happy. */
20343
20344 static void
20345 rs6000_xcoff_file_start (void)
20346 {
20347 rs6000_gen_section_name (&xcoff_bss_section_name,
20348 main_input_filename, ".bss_");
20349 rs6000_gen_section_name (&xcoff_private_data_section_name,
20350 main_input_filename, ".rw_");
20351 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20352 main_input_filename, ".rop_");
20353 rs6000_gen_section_name (&xcoff_read_only_section_name,
20354 main_input_filename, ".ro_");
20355 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20356 main_input_filename, ".tls_");
20357 rs6000_gen_section_name (&xcoff_tbss_section_name,
20358 main_input_filename, ".tbss_[UL]");
20359
20360 fputs ("\t.file\t", asm_out_file);
20361 output_quoted_string (asm_out_file, main_input_filename);
20362 fputc ('\n', asm_out_file);
20363 if (write_symbols != NO_DEBUG)
20364 switch_to_section (private_data_section);
20365 switch_to_section (toc_section);
20366 switch_to_section (text_section);
20367 if (profile_flag)
20368 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20369 rs6000_file_start ();
20370 }
20371
20372 /* Output at end of assembler file.
20373 On the RS/6000, referencing data should automatically pull in text. */
20374
20375 static void
20376 rs6000_xcoff_file_end (void)
20377 {
20378 switch_to_section (text_section);
20379 fputs ("_section_.text:\n", asm_out_file);
20380 switch_to_section (data_section);
20381 fputs (TARGET_32BIT
20382 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20383 asm_out_file);
20384 }
20385
20386 struct declare_alias_data
20387 {
20388 FILE *file;
20389 bool function_descriptor;
20390 };
20391
20392 /* Declare alias N. A helper function for for_node_and_aliases. */
20393
20394 static bool
20395 rs6000_declare_alias (struct symtab_node *n, void *d)
20396 {
20397 struct declare_alias_data *data = (struct declare_alias_data *)d;
20398 /* Main symbol is output specially, because varasm machinery does part of
20399 the job for us - we do not need to declare .globl/lglobs and such. */
20400 if (!n->alias || n->weakref)
20401 return false;
20402
20403 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20404 return false;
20405
20406 /* Prevent assemble_alias from trying to use .set pseudo operation
20407 that does not behave as expected by the middle-end. */
20408 TREE_ASM_WRITTEN (n->decl) = true;
20409
20410 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20411 char *buffer = (char *) alloca (strlen (name) + 2);
20412 char *p;
20413 int dollar_inside = 0;
20414
20415 strcpy (buffer, name);
20416 p = strchr (buffer, '$');
20417 while (p) {
20418 *p = '_';
20419 dollar_inside++;
20420 p = strchr (p + 1, '$');
20421 }
20422 if (TREE_PUBLIC (n->decl))
20423 {
20424 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20425 {
20426 if (dollar_inside) {
20427 if (data->function_descriptor)
20428 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20429 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20430 }
20431 if (data->function_descriptor)
20432 {
20433 fputs ("\t.globl .", data->file);
20434 RS6000_OUTPUT_BASENAME (data->file, buffer);
20435 putc ('\n', data->file);
20436 }
20437 fputs ("\t.globl ", data->file);
20438 RS6000_OUTPUT_BASENAME (data->file, buffer);
20439 putc ('\n', data->file);
20440 }
20441 #ifdef ASM_WEAKEN_DECL
20442 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20443 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20444 #endif
20445 }
20446 else
20447 {
20448 if (dollar_inside)
20449 {
20450 if (data->function_descriptor)
20451 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20452 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20453 }
20454 if (data->function_descriptor)
20455 {
20456 fputs ("\t.lglobl .", data->file);
20457 RS6000_OUTPUT_BASENAME (data->file, buffer);
20458 putc ('\n', data->file);
20459 }
20460 fputs ("\t.lglobl ", data->file);
20461 RS6000_OUTPUT_BASENAME (data->file, buffer);
20462 putc ('\n', data->file);
20463 }
20464 if (data->function_descriptor)
20465 fputs (".", data->file);
20466 RS6000_OUTPUT_BASENAME (data->file, buffer);
20467 fputs (":\n", data->file);
20468 return false;
20469 }
20470
20471
20472 #ifdef HAVE_GAS_HIDDEN
20473 /* Helper function to calculate visibility of a DECL
20474 and return the value as a const string. */
20475
20476 static const char *
20477 rs6000_xcoff_visibility (tree decl)
20478 {
20479 static const char * const visibility_types[] = {
20480 "", ",protected", ",hidden", ",internal"
20481 };
20482
20483 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20484 return visibility_types[vis];
20485 }
20486 #endif
20487
20488
20489 /* This macro produces the initial definition of a function name.
20490 On the RS/6000, we need to place an extra '.' in the function name and
20491 output the function descriptor.
20492 Dollar signs are converted to underscores.
20493
20494 The csect for the function will have already been created when
20495 text_section was selected. We do have to go back to that csect, however.
20496
20497 The third and fourth parameters to the .function pseudo-op (16 and 044)
20498 are placeholders which no longer have any use.
20499
20500 Because AIX assembler's .set command has unexpected semantics, we output
20501 all aliases as alternative labels in front of the definition. */
20502
20503 void
20504 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20505 {
20506 char *buffer = (char *) alloca (strlen (name) + 1);
20507 char *p;
20508 int dollar_inside = 0;
20509 struct declare_alias_data data = {file, false};
20510
20511 strcpy (buffer, name);
20512 p = strchr (buffer, '$');
20513 while (p) {
20514 *p = '_';
20515 dollar_inside++;
20516 p = strchr (p + 1, '$');
20517 }
20518 if (TREE_PUBLIC (decl))
20519 {
20520 if (!RS6000_WEAK || !DECL_WEAK (decl))
20521 {
20522 if (dollar_inside) {
20523 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20524 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20525 }
20526 fputs ("\t.globl .", file);
20527 RS6000_OUTPUT_BASENAME (file, buffer);
20528 #ifdef HAVE_GAS_HIDDEN
20529 fputs (rs6000_xcoff_visibility (decl), file);
20530 #endif
20531 putc ('\n', file);
20532 }
20533 }
20534 else
20535 {
20536 if (dollar_inside) {
20537 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20538 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20539 }
20540 fputs ("\t.lglobl .", file);
20541 RS6000_OUTPUT_BASENAME (file, buffer);
20542 putc ('\n', file);
20543 }
20544 fputs ("\t.csect ", file);
20545 RS6000_OUTPUT_BASENAME (file, buffer);
20546 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20547 RS6000_OUTPUT_BASENAME (file, buffer);
20548 fputs (":\n", file);
20549 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20550 &data, true);
20551 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20552 RS6000_OUTPUT_BASENAME (file, buffer);
20553 fputs (", TOC[tc0], 0\n", file);
20554 in_section = NULL;
20555 switch_to_section (function_section (decl));
20556 putc ('.', file);
20557 RS6000_OUTPUT_BASENAME (file, buffer);
20558 fputs (":\n", file);
20559 data.function_descriptor = true;
20560 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20561 &data, true);
20562 if (!DECL_IGNORED_P (decl))
20563 {
20564 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20565 xcoffout_declare_function (file, decl, buffer);
20566 else if (write_symbols == DWARF2_DEBUG)
20567 {
20568 name = (*targetm.strip_name_encoding) (name);
20569 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20570 }
20571 }
20572 return;
20573 }
20574
20575
20576 /* Output assembly language to globalize a symbol from a DECL,
20577 possibly with visibility. */
20578
20579 void
20580 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20581 {
20582 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20583 fputs (GLOBAL_ASM_OP, stream);
20584 RS6000_OUTPUT_BASENAME (stream, name);
20585 #ifdef HAVE_GAS_HIDDEN
20586 fputs (rs6000_xcoff_visibility (decl), stream);
20587 #endif
20588 putc ('\n', stream);
20589 }
20590
20591 /* Output assembly language to define a symbol as COMMON from a DECL,
20592 possibly with visibility. */
20593
20594 void
20595 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20596 tree decl ATTRIBUTE_UNUSED,
20597 const char *name,
20598 unsigned HOST_WIDE_INT size,
20599 unsigned HOST_WIDE_INT align)
20600 {
20601 unsigned HOST_WIDE_INT align2 = 2;
20602
20603 if (align > 32)
20604 align2 = floor_log2 (align / BITS_PER_UNIT);
20605 else if (size > 4)
20606 align2 = 3;
20607
20608 fputs (COMMON_ASM_OP, stream);
20609 RS6000_OUTPUT_BASENAME (stream, name);
20610
20611 fprintf (stream,
20612 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20613 size, align2);
20614
20615 #ifdef HAVE_GAS_HIDDEN
20616 if (decl != NULL)
20617 fputs (rs6000_xcoff_visibility (decl), stream);
20618 #endif
20619 putc ('\n', stream);
20620 }
20621
20622 /* This macro produces the initial definition of a object (variable) name.
20623 Because AIX assembler's .set command has unexpected semantics, we output
20624 all aliases as alternative labels in front of the definition. */
20625
20626 void
20627 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20628 {
20629 struct declare_alias_data data = {file, false};
20630 RS6000_OUTPUT_BASENAME (file, name);
20631 fputs (":\n", file);
20632 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20633 &data, true);
20634 }
20635
20636 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20637
20638 void
20639 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20640 {
20641 fputs (integer_asm_op (size, FALSE), file);
20642 assemble_name (file, label);
20643 fputs ("-$", file);
20644 }
20645
20646 /* Output a symbol offset relative to the dbase for the current object.
20647 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20648 signed offsets.
20649
20650 __gcc_unwind_dbase is embedded in all executables/libraries through
20651 libgcc/config/rs6000/crtdbase.S. */
20652
20653 void
20654 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20655 {
20656 fputs (integer_asm_op (size, FALSE), file);
20657 assemble_name (file, label);
20658 fputs("-__gcc_unwind_dbase", file);
20659 }
20660
20661 #ifdef HAVE_AS_TLS
20662 static void
20663 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20664 {
20665 rtx symbol;
20666 int flags;
20667 const char *symname;
20668
20669 default_encode_section_info (decl, rtl, first);
20670
20671 /* Careful not to prod global register variables. */
20672 if (!MEM_P (rtl))
20673 return;
20674 symbol = XEXP (rtl, 0);
20675 if (!SYMBOL_REF_P (symbol))
20676 return;
20677
20678 flags = SYMBOL_REF_FLAGS (symbol);
20679
20680 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20681 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20682
20683 SYMBOL_REF_FLAGS (symbol) = flags;
20684
20685 /* Append mapping class to extern decls. */
20686 symname = XSTR (symbol, 0);
20687 if (decl /* sync condition with assemble_external () */
20688 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20689 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20690 || TREE_CODE (decl) == FUNCTION_DECL)
20691 && symname[strlen (symname) - 1] != ']')
20692 {
20693 char *newname = (char *) alloca (strlen (symname) + 5);
20694 strcpy (newname, symname);
20695 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20696 ? "[DS]" : "[UA]"));
20697 XSTR (symbol, 0) = ggc_strdup (newname);
20698 }
20699 }
20700 #endif /* HAVE_AS_TLS */
20701 #endif /* TARGET_XCOFF */
20702
20703 void
20704 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20705 const char *name, const char *val)
20706 {
20707 fputs ("\t.weak\t", stream);
20708 RS6000_OUTPUT_BASENAME (stream, name);
20709 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20710 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20711 {
20712 if (TARGET_XCOFF)
20713 fputs ("[DS]", stream);
20714 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20715 if (TARGET_XCOFF)
20716 fputs (rs6000_xcoff_visibility (decl), stream);
20717 #endif
20718 fputs ("\n\t.weak\t.", stream);
20719 RS6000_OUTPUT_BASENAME (stream, name);
20720 }
20721 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20722 if (TARGET_XCOFF)
20723 fputs (rs6000_xcoff_visibility (decl), stream);
20724 #endif
20725 fputc ('\n', stream);
20726 if (val)
20727 {
20728 #ifdef ASM_OUTPUT_DEF
20729 ASM_OUTPUT_DEF (stream, name, val);
20730 #endif
20731 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20732 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20733 {
20734 fputs ("\t.set\t.", stream);
20735 RS6000_OUTPUT_BASENAME (stream, name);
20736 fputs (",.", stream);
20737 RS6000_OUTPUT_BASENAME (stream, val);
20738 fputc ('\n', stream);
20739 }
20740 }
20741 }
20742
20743
20744 /* Return true if INSN should not be copied. */
20745
20746 static bool
20747 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20748 {
20749 return recog_memoized (insn) >= 0
20750 && get_attr_cannot_copy (insn);
20751 }
20752
20753 /* Compute a (partial) cost for rtx X. Return true if the complete
20754 cost has been computed, and false if subexpressions should be
20755 scanned. In either case, *TOTAL contains the cost result. */
20756
20757 static bool
20758 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20759 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20760 {
20761 int code = GET_CODE (x);
20762
20763 switch (code)
20764 {
20765 /* On the RS/6000, if it is valid in the insn, it is free. */
20766 case CONST_INT:
20767 if (((outer_code == SET
20768 || outer_code == PLUS
20769 || outer_code == MINUS)
20770 && (satisfies_constraint_I (x)
20771 || satisfies_constraint_L (x)))
20772 || (outer_code == AND
20773 && (satisfies_constraint_K (x)
20774 || (mode == SImode
20775 ? satisfies_constraint_L (x)
20776 : satisfies_constraint_J (x))))
20777 || ((outer_code == IOR || outer_code == XOR)
20778 && (satisfies_constraint_K (x)
20779 || (mode == SImode
20780 ? satisfies_constraint_L (x)
20781 : satisfies_constraint_J (x))))
20782 || outer_code == ASHIFT
20783 || outer_code == ASHIFTRT
20784 || outer_code == LSHIFTRT
20785 || outer_code == ROTATE
20786 || outer_code == ROTATERT
20787 || outer_code == ZERO_EXTRACT
20788 || (outer_code == MULT
20789 && satisfies_constraint_I (x))
20790 || ((outer_code == DIV || outer_code == UDIV
20791 || outer_code == MOD || outer_code == UMOD)
20792 && exact_log2 (INTVAL (x)) >= 0)
20793 || (outer_code == COMPARE
20794 && (satisfies_constraint_I (x)
20795 || satisfies_constraint_K (x)))
20796 || ((outer_code == EQ || outer_code == NE)
20797 && (satisfies_constraint_I (x)
20798 || satisfies_constraint_K (x)
20799 || (mode == SImode
20800 ? satisfies_constraint_L (x)
20801 : satisfies_constraint_J (x))))
20802 || (outer_code == GTU
20803 && satisfies_constraint_I (x))
20804 || (outer_code == LTU
20805 && satisfies_constraint_P (x)))
20806 {
20807 *total = 0;
20808 return true;
20809 }
20810 else if ((outer_code == PLUS
20811 && reg_or_add_cint_operand (x, VOIDmode))
20812 || (outer_code == MINUS
20813 && reg_or_sub_cint_operand (x, VOIDmode))
20814 || ((outer_code == SET
20815 || outer_code == IOR
20816 || outer_code == XOR)
20817 && (INTVAL (x)
20818 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20819 {
20820 *total = COSTS_N_INSNS (1);
20821 return true;
20822 }
20823 /* FALLTHRU */
20824
20825 case CONST_DOUBLE:
20826 case CONST_WIDE_INT:
20827 case CONST:
20828 case HIGH:
20829 case SYMBOL_REF:
20830 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20831 return true;
20832
20833 case MEM:
20834 /* When optimizing for size, MEM should be slightly more expensive
20835 than generating address, e.g., (plus (reg) (const)).
20836 L1 cache latency is about two instructions. */
20837 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20838 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20839 *total += COSTS_N_INSNS (100);
20840 return true;
20841
20842 case LABEL_REF:
20843 *total = 0;
20844 return true;
20845
20846 case PLUS:
20847 case MINUS:
20848 if (FLOAT_MODE_P (mode))
20849 *total = rs6000_cost->fp;
20850 else
20851 *total = COSTS_N_INSNS (1);
20852 return false;
20853
20854 case MULT:
20855 if (CONST_INT_P (XEXP (x, 1))
20856 && satisfies_constraint_I (XEXP (x, 1)))
20857 {
20858 if (INTVAL (XEXP (x, 1)) >= -256
20859 && INTVAL (XEXP (x, 1)) <= 255)
20860 *total = rs6000_cost->mulsi_const9;
20861 else
20862 *total = rs6000_cost->mulsi_const;
20863 }
20864 else if (mode == SFmode)
20865 *total = rs6000_cost->fp;
20866 else if (FLOAT_MODE_P (mode))
20867 *total = rs6000_cost->dmul;
20868 else if (mode == DImode)
20869 *total = rs6000_cost->muldi;
20870 else
20871 *total = rs6000_cost->mulsi;
20872 return false;
20873
20874 case FMA:
20875 if (mode == SFmode)
20876 *total = rs6000_cost->fp;
20877 else
20878 *total = rs6000_cost->dmul;
20879 break;
20880
20881 case DIV:
20882 case MOD:
20883 if (FLOAT_MODE_P (mode))
20884 {
20885 *total = mode == DFmode ? rs6000_cost->ddiv
20886 : rs6000_cost->sdiv;
20887 return false;
20888 }
20889 /* FALLTHRU */
20890
20891 case UDIV:
20892 case UMOD:
20893 if (CONST_INT_P (XEXP (x, 1))
20894 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20895 {
20896 if (code == DIV || code == MOD)
20897 /* Shift, addze */
20898 *total = COSTS_N_INSNS (2);
20899 else
20900 /* Shift */
20901 *total = COSTS_N_INSNS (1);
20902 }
20903 else
20904 {
20905 if (GET_MODE (XEXP (x, 1)) == DImode)
20906 *total = rs6000_cost->divdi;
20907 else
20908 *total = rs6000_cost->divsi;
20909 }
20910 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20911 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20912 *total += COSTS_N_INSNS (2);
20913 return false;
20914
20915 case CTZ:
20916 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20917 return false;
20918
20919 case FFS:
20920 *total = COSTS_N_INSNS (4);
20921 return false;
20922
20923 case POPCOUNT:
20924 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20925 return false;
20926
20927 case PARITY:
20928 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20929 return false;
20930
20931 case NOT:
20932 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20933 *total = 0;
20934 else
20935 *total = COSTS_N_INSNS (1);
20936 return false;
20937
20938 case AND:
20939 if (CONST_INT_P (XEXP (x, 1)))
20940 {
20941 rtx left = XEXP (x, 0);
20942 rtx_code left_code = GET_CODE (left);
20943
20944 /* rotate-and-mask: 1 insn. */
20945 if ((left_code == ROTATE
20946 || left_code == ASHIFT
20947 || left_code == LSHIFTRT)
20948 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20949 {
20950 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20951 if (!CONST_INT_P (XEXP (left, 1)))
20952 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20953 *total += COSTS_N_INSNS (1);
20954 return true;
20955 }
20956
20957 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20958 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20959 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20960 || (val & 0xffff) == val
20961 || (val & 0xffff0000) == val
20962 || ((val & 0xffff) == 0 && mode == SImode))
20963 {
20964 *total = rtx_cost (left, mode, AND, 0, speed);
20965 *total += COSTS_N_INSNS (1);
20966 return true;
20967 }
20968
20969 /* 2 insns. */
20970 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20971 {
20972 *total = rtx_cost (left, mode, AND, 0, speed);
20973 *total += COSTS_N_INSNS (2);
20974 return true;
20975 }
20976 }
20977
20978 *total = COSTS_N_INSNS (1);
20979 return false;
20980
20981 case IOR:
20982 /* FIXME */
20983 *total = COSTS_N_INSNS (1);
20984 return true;
20985
20986 case CLZ:
20987 case XOR:
20988 case ZERO_EXTRACT:
20989 *total = COSTS_N_INSNS (1);
20990 return false;
20991
20992 case ASHIFT:
20993 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20994 the sign extend and shift separately within the insn. */
20995 if (TARGET_EXTSWSLI && mode == DImode
20996 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20997 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20998 {
20999 *total = 0;
21000 return false;
21001 }
21002 /* fall through */
21003
21004 case ASHIFTRT:
21005 case LSHIFTRT:
21006 case ROTATE:
21007 case ROTATERT:
21008 /* Handle mul_highpart. */
21009 if (outer_code == TRUNCATE
21010 && GET_CODE (XEXP (x, 0)) == MULT)
21011 {
21012 if (mode == DImode)
21013 *total = rs6000_cost->muldi;
21014 else
21015 *total = rs6000_cost->mulsi;
21016 return true;
21017 }
21018 else if (outer_code == AND)
21019 *total = 0;
21020 else
21021 *total = COSTS_N_INSNS (1);
21022 return false;
21023
21024 case SIGN_EXTEND:
21025 case ZERO_EXTEND:
21026 if (MEM_P (XEXP (x, 0)))
21027 *total = 0;
21028 else
21029 *total = COSTS_N_INSNS (1);
21030 return false;
21031
21032 case COMPARE:
21033 case NEG:
21034 case ABS:
21035 if (!FLOAT_MODE_P (mode))
21036 {
21037 *total = COSTS_N_INSNS (1);
21038 return false;
21039 }
21040 /* FALLTHRU */
21041
21042 case FLOAT:
21043 case UNSIGNED_FLOAT:
21044 case FIX:
21045 case UNSIGNED_FIX:
21046 case FLOAT_TRUNCATE:
21047 *total = rs6000_cost->fp;
21048 return false;
21049
21050 case FLOAT_EXTEND:
21051 if (mode == DFmode)
21052 *total = rs6000_cost->sfdf_convert;
21053 else
21054 *total = rs6000_cost->fp;
21055 return false;
21056
21057 case CALL:
21058 case IF_THEN_ELSE:
21059 if (!speed)
21060 {
21061 *total = COSTS_N_INSNS (1);
21062 return true;
21063 }
21064 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21065 {
21066 *total = rs6000_cost->fp;
21067 return false;
21068 }
21069 break;
21070
21071 case NE:
21072 case EQ:
21073 case GTU:
21074 case LTU:
21075 /* Carry bit requires mode == Pmode.
21076 NEG or PLUS already counted so only add one. */
21077 if (mode == Pmode
21078 && (outer_code == NEG || outer_code == PLUS))
21079 {
21080 *total = COSTS_N_INSNS (1);
21081 return true;
21082 }
21083 /* FALLTHRU */
21084
21085 case GT:
21086 case LT:
21087 case UNORDERED:
21088 if (outer_code == SET)
21089 {
21090 if (XEXP (x, 1) == const0_rtx)
21091 {
21092 *total = COSTS_N_INSNS (2);
21093 return true;
21094 }
21095 else
21096 {
21097 *total = COSTS_N_INSNS (3);
21098 return false;
21099 }
21100 }
21101 /* CC COMPARE. */
21102 if (outer_code == COMPARE)
21103 {
21104 *total = 0;
21105 return true;
21106 }
21107 break;
21108
21109 default:
21110 break;
21111 }
21112
21113 return false;
21114 }
21115
21116 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21117
21118 static bool
21119 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21120 int opno, int *total, bool speed)
21121 {
21122 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21123
21124 fprintf (stderr,
21125 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21126 "opno = %d, total = %d, speed = %s, x:\n",
21127 ret ? "complete" : "scan inner",
21128 GET_MODE_NAME (mode),
21129 GET_RTX_NAME (outer_code),
21130 opno,
21131 *total,
21132 speed ? "true" : "false");
21133
21134 debug_rtx (x);
21135
21136 return ret;
21137 }
21138
21139 static int
21140 rs6000_insn_cost (rtx_insn *insn, bool speed)
21141 {
21142 if (recog_memoized (insn) < 0)
21143 return 0;
21144
21145 /* If we are optimizing for size, just use the length. */
21146 if (!speed)
21147 return get_attr_length (insn);
21148
21149 /* Use the cost if provided. */
21150 int cost = get_attr_cost (insn);
21151 if (cost > 0)
21152 return cost;
21153
21154 /* If the insn tells us how many insns there are, use that. Otherwise use
21155 the length/4. Adjust the insn length to remove the extra size that
21156 prefixed instructions take. */
21157 int n = get_attr_num_insns (insn);
21158 if (n == 0)
21159 {
21160 int length = get_attr_length (insn);
21161 if (get_attr_prefixed (insn) == PREFIXED_YES)
21162 {
21163 int adjust = 0;
21164 ADJUST_INSN_LENGTH (insn, adjust);
21165 length -= adjust;
21166 }
21167
21168 n = length / 4;
21169 }
21170
21171 enum attr_type type = get_attr_type (insn);
21172
21173 switch (type)
21174 {
21175 case TYPE_LOAD:
21176 case TYPE_FPLOAD:
21177 case TYPE_VECLOAD:
21178 cost = COSTS_N_INSNS (n + 1);
21179 break;
21180
21181 case TYPE_MUL:
21182 switch (get_attr_size (insn))
21183 {
21184 case SIZE_8:
21185 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21186 break;
21187 case SIZE_16:
21188 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21189 break;
21190 case SIZE_32:
21191 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21192 break;
21193 case SIZE_64:
21194 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21195 break;
21196 default:
21197 gcc_unreachable ();
21198 }
21199 break;
21200 case TYPE_DIV:
21201 switch (get_attr_size (insn))
21202 {
21203 case SIZE_32:
21204 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21205 break;
21206 case SIZE_64:
21207 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21208 break;
21209 default:
21210 gcc_unreachable ();
21211 }
21212 break;
21213
21214 case TYPE_FP:
21215 cost = n * rs6000_cost->fp;
21216 break;
21217 case TYPE_DMUL:
21218 cost = n * rs6000_cost->dmul;
21219 break;
21220 case TYPE_SDIV:
21221 cost = n * rs6000_cost->sdiv;
21222 break;
21223 case TYPE_DDIV:
21224 cost = n * rs6000_cost->ddiv;
21225 break;
21226
21227 case TYPE_SYNC:
21228 case TYPE_LOAD_L:
21229 case TYPE_MFCR:
21230 case TYPE_MFCRF:
21231 cost = COSTS_N_INSNS (n + 2);
21232 break;
21233
21234 default:
21235 cost = COSTS_N_INSNS (n);
21236 }
21237
21238 return cost;
21239 }
21240
21241 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21242
21243 static int
21244 rs6000_debug_address_cost (rtx x, machine_mode mode,
21245 addr_space_t as, bool speed)
21246 {
21247 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21248
21249 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21250 ret, speed ? "true" : "false");
21251 debug_rtx (x);
21252
21253 return ret;
21254 }
21255
21256
21257 /* A C expression returning the cost of moving data from a register of class
21258 CLASS1 to one of CLASS2. */
21259
21260 static int
21261 rs6000_register_move_cost (machine_mode mode,
21262 reg_class_t from, reg_class_t to)
21263 {
21264 int ret;
21265 reg_class_t rclass;
21266
21267 if (TARGET_DEBUG_COST)
21268 dbg_cost_ctrl++;
21269
21270 /* If we have VSX, we can easily move between FPR or Altivec registers,
21271 otherwise we can only easily move within classes.
21272 Do this first so we give best-case answers for union classes
21273 containing both gprs and vsx regs. */
21274 HARD_REG_SET to_vsx, from_vsx;
21275 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21276 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21277 if (!hard_reg_set_empty_p (to_vsx)
21278 && !hard_reg_set_empty_p (from_vsx)
21279 && (TARGET_VSX
21280 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21281 {
21282 int reg = FIRST_FPR_REGNO;
21283 if (TARGET_VSX
21284 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21285 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21286 reg = FIRST_ALTIVEC_REGNO;
21287 ret = 2 * hard_regno_nregs (reg, mode);
21288 }
21289
21290 /* Moves from/to GENERAL_REGS. */
21291 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21292 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21293 {
21294 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21295 {
21296 if (TARGET_DIRECT_MOVE)
21297 {
21298 /* Keep the cost for direct moves above that for within
21299 a register class even if the actual processor cost is
21300 comparable. We do this because a direct move insn
21301 can't be a nop, whereas with ideal register
21302 allocation a move within the same class might turn
21303 out to be a nop. */
21304 if (rs6000_tune == PROCESSOR_POWER9
21305 || rs6000_tune == PROCESSOR_FUTURE)
21306 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21307 else
21308 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21309 /* SFmode requires a conversion when moving between gprs
21310 and vsx. */
21311 if (mode == SFmode)
21312 ret += 2;
21313 }
21314 else
21315 ret = (rs6000_memory_move_cost (mode, rclass, false)
21316 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21317 }
21318
21319 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21320 shift. */
21321 else if (rclass == CR_REGS)
21322 ret = 4;
21323
21324 /* For those processors that have slow LR/CTR moves, make them more
21325 expensive than memory in order to bias spills to memory .*/
21326 else if ((rs6000_tune == PROCESSOR_POWER6
21327 || rs6000_tune == PROCESSOR_POWER7
21328 || rs6000_tune == PROCESSOR_POWER8
21329 || rs6000_tune == PROCESSOR_POWER9)
21330 && reg_class_subset_p (rclass, SPECIAL_REGS))
21331 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21332
21333 else
21334 /* A move will cost one instruction per GPR moved. */
21335 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21336 }
21337
21338 /* Everything else has to go through GENERAL_REGS. */
21339 else
21340 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21341 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21342
21343 if (TARGET_DEBUG_COST)
21344 {
21345 if (dbg_cost_ctrl == 1)
21346 fprintf (stderr,
21347 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21348 ret, GET_MODE_NAME (mode), reg_class_names[from],
21349 reg_class_names[to]);
21350 dbg_cost_ctrl--;
21351 }
21352
21353 return ret;
21354 }
21355
21356 /* A C expressions returning the cost of moving data of MODE from a register to
21357 or from memory. */
21358
21359 static int
21360 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21361 bool in ATTRIBUTE_UNUSED)
21362 {
21363 int ret;
21364
21365 if (TARGET_DEBUG_COST)
21366 dbg_cost_ctrl++;
21367
21368 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21369 ret = 4 * hard_regno_nregs (0, mode);
21370 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21371 || reg_classes_intersect_p (rclass, VSX_REGS)))
21372 ret = 4 * hard_regno_nregs (32, mode);
21373 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21374 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21375 else
21376 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21377
21378 if (TARGET_DEBUG_COST)
21379 {
21380 if (dbg_cost_ctrl == 1)
21381 fprintf (stderr,
21382 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21383 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21384 dbg_cost_ctrl--;
21385 }
21386
21387 return ret;
21388 }
21389
21390 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21391
21392 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21393 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21394 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21395 move cost between GENERAL_REGS and VSX_REGS low.
21396
21397 It might seem reasonable to use a union class. After all, if usage
21398 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21399 rather than memory. However, in cases where register pressure of
21400 both is high, like the cactus_adm spec test, allowing
21401 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21402 the first scheduling pass. This is partly due to an allocno of
21403 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21404 class, which gives too high a pressure for GENERAL_REGS and too low
21405 for VSX_REGS. So, force a choice of the subclass here.
21406
21407 The best class is also the union if GENERAL_REGS and VSX_REGS have
21408 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21409 allocno class, since trying to narrow down the class by regno mode
21410 is prone to error. For example, SImode is allowed in VSX regs and
21411 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21412 it would be wrong to choose an allocno of GENERAL_REGS based on
21413 SImode. */
21414
21415 static reg_class_t
21416 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21417 reg_class_t allocno_class,
21418 reg_class_t best_class)
21419 {
21420 switch (allocno_class)
21421 {
21422 case GEN_OR_VSX_REGS:
21423 /* best_class must be a subset of allocno_class. */
21424 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21425 || best_class == GEN_OR_FLOAT_REGS
21426 || best_class == VSX_REGS
21427 || best_class == ALTIVEC_REGS
21428 || best_class == FLOAT_REGS
21429 || best_class == GENERAL_REGS
21430 || best_class == BASE_REGS);
21431 /* Use best_class but choose wider classes when copying from the
21432 wider class to best_class is cheap. This mimics IRA choice
21433 of allocno class. */
21434 if (best_class == BASE_REGS)
21435 return GENERAL_REGS;
21436 if (TARGET_VSX
21437 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21438 return VSX_REGS;
21439 return best_class;
21440
21441 default:
21442 break;
21443 }
21444
21445 return allocno_class;
21446 }
21447
21448 /* Returns a code for a target-specific builtin that implements
21449 reciprocal of the function, or NULL_TREE if not available. */
21450
21451 static tree
21452 rs6000_builtin_reciprocal (tree fndecl)
21453 {
21454 switch (DECL_MD_FUNCTION_CODE (fndecl))
21455 {
21456 case VSX_BUILTIN_XVSQRTDP:
21457 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21458 return NULL_TREE;
21459
21460 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21461
21462 case VSX_BUILTIN_XVSQRTSP:
21463 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21464 return NULL_TREE;
21465
21466 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21467
21468 default:
21469 return NULL_TREE;
21470 }
21471 }
21472
21473 /* Load up a constant. If the mode is a vector mode, splat the value across
21474 all of the vector elements. */
21475
21476 static rtx
21477 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21478 {
21479 rtx reg;
21480
21481 if (mode == SFmode || mode == DFmode)
21482 {
21483 rtx d = const_double_from_real_value (dconst, mode);
21484 reg = force_reg (mode, d);
21485 }
21486 else if (mode == V4SFmode)
21487 {
21488 rtx d = const_double_from_real_value (dconst, SFmode);
21489 rtvec v = gen_rtvec (4, d, d, d, d);
21490 reg = gen_reg_rtx (mode);
21491 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21492 }
21493 else if (mode == V2DFmode)
21494 {
21495 rtx d = const_double_from_real_value (dconst, DFmode);
21496 rtvec v = gen_rtvec (2, d, d);
21497 reg = gen_reg_rtx (mode);
21498 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21499 }
21500 else
21501 gcc_unreachable ();
21502
21503 return reg;
21504 }
21505
21506 /* Generate an FMA instruction. */
21507
21508 static void
21509 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21510 {
21511 machine_mode mode = GET_MODE (target);
21512 rtx dst;
21513
21514 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21515 gcc_assert (dst != NULL);
21516
21517 if (dst != target)
21518 emit_move_insn (target, dst);
21519 }
21520
21521 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21522
21523 static void
21524 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21525 {
21526 machine_mode mode = GET_MODE (dst);
21527 rtx r;
21528
21529 /* This is a tad more complicated, since the fnma_optab is for
21530 a different expression: fma(-m1, m2, a), which is the same
21531 thing except in the case of signed zeros.
21532
21533 Fortunately we know that if FMA is supported that FNMSUB is
21534 also supported in the ISA. Just expand it directly. */
21535
21536 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21537
21538 r = gen_rtx_NEG (mode, a);
21539 r = gen_rtx_FMA (mode, m1, m2, r);
21540 r = gen_rtx_NEG (mode, r);
21541 emit_insn (gen_rtx_SET (dst, r));
21542 }
21543
21544 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21545 add a reg_note saying that this was a division. Support both scalar and
21546 vector divide. Assumes no trapping math and finite arguments. */
21547
21548 void
21549 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21550 {
21551 machine_mode mode = GET_MODE (dst);
21552 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21553 int i;
21554
21555 /* Low precision estimates guarantee 5 bits of accuracy. High
21556 precision estimates guarantee 14 bits of accuracy. SFmode
21557 requires 23 bits of accuracy. DFmode requires 52 bits of
21558 accuracy. Each pass at least doubles the accuracy, leading
21559 to the following. */
21560 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21561 if (mode == DFmode || mode == V2DFmode)
21562 passes++;
21563
21564 enum insn_code code = optab_handler (smul_optab, mode);
21565 insn_gen_fn gen_mul = GEN_FCN (code);
21566
21567 gcc_assert (code != CODE_FOR_nothing);
21568
21569 one = rs6000_load_constant_and_splat (mode, dconst1);
21570
21571 /* x0 = 1./d estimate */
21572 x0 = gen_reg_rtx (mode);
21573 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21574 UNSPEC_FRES)));
21575
21576 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21577 if (passes > 1) {
21578
21579 /* e0 = 1. - d * x0 */
21580 e0 = gen_reg_rtx (mode);
21581 rs6000_emit_nmsub (e0, d, x0, one);
21582
21583 /* x1 = x0 + e0 * x0 */
21584 x1 = gen_reg_rtx (mode);
21585 rs6000_emit_madd (x1, e0, x0, x0);
21586
21587 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21588 ++i, xprev = xnext, eprev = enext) {
21589
21590 /* enext = eprev * eprev */
21591 enext = gen_reg_rtx (mode);
21592 emit_insn (gen_mul (enext, eprev, eprev));
21593
21594 /* xnext = xprev + enext * xprev */
21595 xnext = gen_reg_rtx (mode);
21596 rs6000_emit_madd (xnext, enext, xprev, xprev);
21597 }
21598
21599 } else
21600 xprev = x0;
21601
21602 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21603
21604 /* u = n * xprev */
21605 u = gen_reg_rtx (mode);
21606 emit_insn (gen_mul (u, n, xprev));
21607
21608 /* v = n - (d * u) */
21609 v = gen_reg_rtx (mode);
21610 rs6000_emit_nmsub (v, d, u, n);
21611
21612 /* dst = (v * xprev) + u */
21613 rs6000_emit_madd (dst, v, xprev, u);
21614
21615 if (note_p)
21616 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21617 }
21618
21619 /* Goldschmidt's Algorithm for single/double-precision floating point
21620 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21621
21622 void
21623 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21624 {
21625 machine_mode mode = GET_MODE (src);
21626 rtx e = gen_reg_rtx (mode);
21627 rtx g = gen_reg_rtx (mode);
21628 rtx h = gen_reg_rtx (mode);
21629
21630 /* Low precision estimates guarantee 5 bits of accuracy. High
21631 precision estimates guarantee 14 bits of accuracy. SFmode
21632 requires 23 bits of accuracy. DFmode requires 52 bits of
21633 accuracy. Each pass at least doubles the accuracy, leading
21634 to the following. */
21635 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21636 if (mode == DFmode || mode == V2DFmode)
21637 passes++;
21638
21639 int i;
21640 rtx mhalf;
21641 enum insn_code code = optab_handler (smul_optab, mode);
21642 insn_gen_fn gen_mul = GEN_FCN (code);
21643
21644 gcc_assert (code != CODE_FOR_nothing);
21645
21646 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21647
21648 /* e = rsqrt estimate */
21649 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21650 UNSPEC_RSQRT)));
21651
21652 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21653 if (!recip)
21654 {
21655 rtx zero = force_reg (mode, CONST0_RTX (mode));
21656
21657 if (mode == SFmode)
21658 {
21659 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21660 e, zero, mode, 0);
21661 if (target != e)
21662 emit_move_insn (e, target);
21663 }
21664 else
21665 {
21666 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21667 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21668 }
21669 }
21670
21671 /* g = sqrt estimate. */
21672 emit_insn (gen_mul (g, e, src));
21673 /* h = 1/(2*sqrt) estimate. */
21674 emit_insn (gen_mul (h, e, mhalf));
21675
21676 if (recip)
21677 {
21678 if (passes == 1)
21679 {
21680 rtx t = gen_reg_rtx (mode);
21681 rs6000_emit_nmsub (t, g, h, mhalf);
21682 /* Apply correction directly to 1/rsqrt estimate. */
21683 rs6000_emit_madd (dst, e, t, e);
21684 }
21685 else
21686 {
21687 for (i = 0; i < passes; i++)
21688 {
21689 rtx t1 = gen_reg_rtx (mode);
21690 rtx g1 = gen_reg_rtx (mode);
21691 rtx h1 = gen_reg_rtx (mode);
21692
21693 rs6000_emit_nmsub (t1, g, h, mhalf);
21694 rs6000_emit_madd (g1, g, t1, g);
21695 rs6000_emit_madd (h1, h, t1, h);
21696
21697 g = g1;
21698 h = h1;
21699 }
21700 /* Multiply by 2 for 1/rsqrt. */
21701 emit_insn (gen_add3_insn (dst, h, h));
21702 }
21703 }
21704 else
21705 {
21706 rtx t = gen_reg_rtx (mode);
21707 rs6000_emit_nmsub (t, g, h, mhalf);
21708 rs6000_emit_madd (dst, g, t, g);
21709 }
21710
21711 return;
21712 }
21713
21714 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21715 (Power7) targets. DST is the target, and SRC is the argument operand. */
21716
21717 void
21718 rs6000_emit_popcount (rtx dst, rtx src)
21719 {
21720 machine_mode mode = GET_MODE (dst);
21721 rtx tmp1, tmp2;
21722
21723 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21724 if (TARGET_POPCNTD)
21725 {
21726 if (mode == SImode)
21727 emit_insn (gen_popcntdsi2 (dst, src));
21728 else
21729 emit_insn (gen_popcntddi2 (dst, src));
21730 return;
21731 }
21732
21733 tmp1 = gen_reg_rtx (mode);
21734
21735 if (mode == SImode)
21736 {
21737 emit_insn (gen_popcntbsi2 (tmp1, src));
21738 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21739 NULL_RTX, 0);
21740 tmp2 = force_reg (SImode, tmp2);
21741 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21742 }
21743 else
21744 {
21745 emit_insn (gen_popcntbdi2 (tmp1, src));
21746 tmp2 = expand_mult (DImode, tmp1,
21747 GEN_INT ((HOST_WIDE_INT)
21748 0x01010101 << 32 | 0x01010101),
21749 NULL_RTX, 0);
21750 tmp2 = force_reg (DImode, tmp2);
21751 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21752 }
21753 }
21754
21755
21756 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21757 target, and SRC is the argument operand. */
21758
21759 void
21760 rs6000_emit_parity (rtx dst, rtx src)
21761 {
21762 machine_mode mode = GET_MODE (dst);
21763 rtx tmp;
21764
21765 tmp = gen_reg_rtx (mode);
21766
21767 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21768 if (TARGET_CMPB)
21769 {
21770 if (mode == SImode)
21771 {
21772 emit_insn (gen_popcntbsi2 (tmp, src));
21773 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21774 }
21775 else
21776 {
21777 emit_insn (gen_popcntbdi2 (tmp, src));
21778 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21779 }
21780 return;
21781 }
21782
21783 if (mode == SImode)
21784 {
21785 /* Is mult+shift >= shift+xor+shift+xor? */
21786 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21787 {
21788 rtx tmp1, tmp2, tmp3, tmp4;
21789
21790 tmp1 = gen_reg_rtx (SImode);
21791 emit_insn (gen_popcntbsi2 (tmp1, src));
21792
21793 tmp2 = gen_reg_rtx (SImode);
21794 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21795 tmp3 = gen_reg_rtx (SImode);
21796 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21797
21798 tmp4 = gen_reg_rtx (SImode);
21799 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21800 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21801 }
21802 else
21803 rs6000_emit_popcount (tmp, src);
21804 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21805 }
21806 else
21807 {
21808 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21809 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21810 {
21811 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21812
21813 tmp1 = gen_reg_rtx (DImode);
21814 emit_insn (gen_popcntbdi2 (tmp1, src));
21815
21816 tmp2 = gen_reg_rtx (DImode);
21817 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21818 tmp3 = gen_reg_rtx (DImode);
21819 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21820
21821 tmp4 = gen_reg_rtx (DImode);
21822 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21823 tmp5 = gen_reg_rtx (DImode);
21824 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21825
21826 tmp6 = gen_reg_rtx (DImode);
21827 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21828 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21829 }
21830 else
21831 rs6000_emit_popcount (tmp, src);
21832 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21833 }
21834 }
21835
21836 /* Expand an Altivec constant permutation for little endian mode.
21837 OP0 and OP1 are the input vectors and TARGET is the output vector.
21838 SEL specifies the constant permutation vector.
21839
21840 There are two issues: First, the two input operands must be
21841 swapped so that together they form a double-wide array in LE
21842 order. Second, the vperm instruction has surprising behavior
21843 in LE mode: it interprets the elements of the source vectors
21844 in BE mode ("left to right") and interprets the elements of
21845 the destination vector in LE mode ("right to left"). To
21846 correct for this, we must subtract each element of the permute
21847 control vector from 31.
21848
21849 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21850 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21851 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21852 serve as the permute control vector. Then, in BE mode,
21853
21854 vperm 9,10,11,12
21855
21856 places the desired result in vr9. However, in LE mode the
21857 vector contents will be
21858
21859 vr10 = 00000003 00000002 00000001 00000000
21860 vr11 = 00000007 00000006 00000005 00000004
21861
21862 The result of the vperm using the same permute control vector is
21863
21864 vr9 = 05000000 07000000 01000000 03000000
21865
21866 That is, the leftmost 4 bytes of vr10 are interpreted as the
21867 source for the rightmost 4 bytes of vr9, and so on.
21868
21869 If we change the permute control vector to
21870
21871 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21872
21873 and issue
21874
21875 vperm 9,11,10,12
21876
21877 we get the desired
21878
21879 vr9 = 00000006 00000004 00000002 00000000. */
21880
21881 static void
21882 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21883 const vec_perm_indices &sel)
21884 {
21885 unsigned int i;
21886 rtx perm[16];
21887 rtx constv, unspec;
21888
21889 /* Unpack and adjust the constant selector. */
21890 for (i = 0; i < 16; ++i)
21891 {
21892 unsigned int elt = 31 - (sel[i] & 31);
21893 perm[i] = GEN_INT (elt);
21894 }
21895
21896 /* Expand to a permute, swapping the inputs and using the
21897 adjusted selector. */
21898 if (!REG_P (op0))
21899 op0 = force_reg (V16QImode, op0);
21900 if (!REG_P (op1))
21901 op1 = force_reg (V16QImode, op1);
21902
21903 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21904 constv = force_reg (V16QImode, constv);
21905 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21906 UNSPEC_VPERM);
21907 if (!REG_P (target))
21908 {
21909 rtx tmp = gen_reg_rtx (V16QImode);
21910 emit_move_insn (tmp, unspec);
21911 unspec = tmp;
21912 }
21913
21914 emit_move_insn (target, unspec);
21915 }
21916
21917 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21918 permute control vector. But here it's not a constant, so we must
21919 generate a vector NAND or NOR to do the adjustment. */
21920
21921 void
21922 altivec_expand_vec_perm_le (rtx operands[4])
21923 {
21924 rtx notx, iorx, unspec;
21925 rtx target = operands[0];
21926 rtx op0 = operands[1];
21927 rtx op1 = operands[2];
21928 rtx sel = operands[3];
21929 rtx tmp = target;
21930 rtx norreg = gen_reg_rtx (V16QImode);
21931 machine_mode mode = GET_MODE (target);
21932
21933 /* Get everything in regs so the pattern matches. */
21934 if (!REG_P (op0))
21935 op0 = force_reg (mode, op0);
21936 if (!REG_P (op1))
21937 op1 = force_reg (mode, op1);
21938 if (!REG_P (sel))
21939 sel = force_reg (V16QImode, sel);
21940 if (!REG_P (target))
21941 tmp = gen_reg_rtx (mode);
21942
21943 if (TARGET_P9_VECTOR)
21944 {
21945 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21946 UNSPEC_VPERMR);
21947 }
21948 else
21949 {
21950 /* Invert the selector with a VNAND if available, else a VNOR.
21951 The VNAND is preferred for future fusion opportunities. */
21952 notx = gen_rtx_NOT (V16QImode, sel);
21953 iorx = (TARGET_P8_VECTOR
21954 ? gen_rtx_IOR (V16QImode, notx, notx)
21955 : gen_rtx_AND (V16QImode, notx, notx));
21956 emit_insn (gen_rtx_SET (norreg, iorx));
21957
21958 /* Permute with operands reversed and adjusted selector. */
21959 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21960 UNSPEC_VPERM);
21961 }
21962
21963 /* Copy into target, possibly by way of a register. */
21964 if (!REG_P (target))
21965 {
21966 emit_move_insn (tmp, unspec);
21967 unspec = tmp;
21968 }
21969
21970 emit_move_insn (target, unspec);
21971 }
21972
21973 /* Expand an Altivec constant permutation. Return true if we match
21974 an efficient implementation; false to fall back to VPERM.
21975
21976 OP0 and OP1 are the input vectors and TARGET is the output vector.
21977 SEL specifies the constant permutation vector. */
21978
21979 static bool
21980 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21981 const vec_perm_indices &sel)
21982 {
21983 struct altivec_perm_insn {
21984 HOST_WIDE_INT mask;
21985 enum insn_code impl;
21986 unsigned char perm[16];
21987 };
21988 static const struct altivec_perm_insn patterns[] = {
21989 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21990 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21991 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21992 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21993 { OPTION_MASK_ALTIVEC,
21994 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21995 : CODE_FOR_altivec_vmrglb_direct),
21996 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21997 { OPTION_MASK_ALTIVEC,
21998 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21999 : CODE_FOR_altivec_vmrglh_direct),
22000 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22001 { OPTION_MASK_ALTIVEC,
22002 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22003 : CODE_FOR_altivec_vmrglw_direct),
22004 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22005 { OPTION_MASK_ALTIVEC,
22006 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22007 : CODE_FOR_altivec_vmrghb_direct),
22008 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22009 { OPTION_MASK_ALTIVEC,
22010 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22011 : CODE_FOR_altivec_vmrghh_direct),
22012 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22013 { OPTION_MASK_ALTIVEC,
22014 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22015 : CODE_FOR_altivec_vmrghw_direct),
22016 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22017 { OPTION_MASK_P8_VECTOR,
22018 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22019 : CODE_FOR_p8_vmrgow_v4sf_direct),
22020 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22021 { OPTION_MASK_P8_VECTOR,
22022 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22023 : CODE_FOR_p8_vmrgew_v4sf_direct),
22024 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22025 };
22026
22027 unsigned int i, j, elt, which;
22028 unsigned char perm[16];
22029 rtx x;
22030 bool one_vec;
22031
22032 /* Unpack the constant selector. */
22033 for (i = which = 0; i < 16; ++i)
22034 {
22035 elt = sel[i] & 31;
22036 which |= (elt < 16 ? 1 : 2);
22037 perm[i] = elt;
22038 }
22039
22040 /* Simplify the constant selector based on operands. */
22041 switch (which)
22042 {
22043 default:
22044 gcc_unreachable ();
22045
22046 case 3:
22047 one_vec = false;
22048 if (!rtx_equal_p (op0, op1))
22049 break;
22050 /* FALLTHRU */
22051
22052 case 2:
22053 for (i = 0; i < 16; ++i)
22054 perm[i] &= 15;
22055 op0 = op1;
22056 one_vec = true;
22057 break;
22058
22059 case 1:
22060 op1 = op0;
22061 one_vec = true;
22062 break;
22063 }
22064
22065 /* Look for splat patterns. */
22066 if (one_vec)
22067 {
22068 elt = perm[0];
22069
22070 for (i = 0; i < 16; ++i)
22071 if (perm[i] != elt)
22072 break;
22073 if (i == 16)
22074 {
22075 if (!BYTES_BIG_ENDIAN)
22076 elt = 15 - elt;
22077 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22078 return true;
22079 }
22080
22081 if (elt % 2 == 0)
22082 {
22083 for (i = 0; i < 16; i += 2)
22084 if (perm[i] != elt || perm[i + 1] != elt + 1)
22085 break;
22086 if (i == 16)
22087 {
22088 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22089 x = gen_reg_rtx (V8HImode);
22090 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22091 GEN_INT (field)));
22092 emit_move_insn (target, gen_lowpart (V16QImode, x));
22093 return true;
22094 }
22095 }
22096
22097 if (elt % 4 == 0)
22098 {
22099 for (i = 0; i < 16; i += 4)
22100 if (perm[i] != elt
22101 || perm[i + 1] != elt + 1
22102 || perm[i + 2] != elt + 2
22103 || perm[i + 3] != elt + 3)
22104 break;
22105 if (i == 16)
22106 {
22107 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22108 x = gen_reg_rtx (V4SImode);
22109 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22110 GEN_INT (field)));
22111 emit_move_insn (target, gen_lowpart (V16QImode, x));
22112 return true;
22113 }
22114 }
22115 }
22116
22117 /* Look for merge and pack patterns. */
22118 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22119 {
22120 bool swapped;
22121
22122 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22123 continue;
22124
22125 elt = patterns[j].perm[0];
22126 if (perm[0] == elt)
22127 swapped = false;
22128 else if (perm[0] == elt + 16)
22129 swapped = true;
22130 else
22131 continue;
22132 for (i = 1; i < 16; ++i)
22133 {
22134 elt = patterns[j].perm[i];
22135 if (swapped)
22136 elt = (elt >= 16 ? elt - 16 : elt + 16);
22137 else if (one_vec && elt >= 16)
22138 elt -= 16;
22139 if (perm[i] != elt)
22140 break;
22141 }
22142 if (i == 16)
22143 {
22144 enum insn_code icode = patterns[j].impl;
22145 machine_mode omode = insn_data[icode].operand[0].mode;
22146 machine_mode imode = insn_data[icode].operand[1].mode;
22147
22148 /* For little-endian, don't use vpkuwum and vpkuhum if the
22149 underlying vector type is not V4SI and V8HI, respectively.
22150 For example, using vpkuwum with a V8HI picks up the even
22151 halfwords (BE numbering) when the even halfwords (LE
22152 numbering) are what we need. */
22153 if (!BYTES_BIG_ENDIAN
22154 && icode == CODE_FOR_altivec_vpkuwum_direct
22155 && ((REG_P (op0)
22156 && GET_MODE (op0) != V4SImode)
22157 || (SUBREG_P (op0)
22158 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22159 continue;
22160 if (!BYTES_BIG_ENDIAN
22161 && icode == CODE_FOR_altivec_vpkuhum_direct
22162 && ((REG_P (op0)
22163 && GET_MODE (op0) != V8HImode)
22164 || (SUBREG_P (op0)
22165 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22166 continue;
22167
22168 /* For little-endian, the two input operands must be swapped
22169 (or swapped back) to ensure proper right-to-left numbering
22170 from 0 to 2N-1. */
22171 if (swapped ^ !BYTES_BIG_ENDIAN)
22172 std::swap (op0, op1);
22173 if (imode != V16QImode)
22174 {
22175 op0 = gen_lowpart (imode, op0);
22176 op1 = gen_lowpart (imode, op1);
22177 }
22178 if (omode == V16QImode)
22179 x = target;
22180 else
22181 x = gen_reg_rtx (omode);
22182 emit_insn (GEN_FCN (icode) (x, op0, op1));
22183 if (omode != V16QImode)
22184 emit_move_insn (target, gen_lowpart (V16QImode, x));
22185 return true;
22186 }
22187 }
22188
22189 if (!BYTES_BIG_ENDIAN)
22190 {
22191 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22192 return true;
22193 }
22194
22195 return false;
22196 }
22197
22198 /* Expand a VSX Permute Doubleword constant permutation.
22199 Return true if we match an efficient implementation. */
22200
22201 static bool
22202 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22203 unsigned char perm0, unsigned char perm1)
22204 {
22205 rtx x;
22206
22207 /* If both selectors come from the same operand, fold to single op. */
22208 if ((perm0 & 2) == (perm1 & 2))
22209 {
22210 if (perm0 & 2)
22211 op0 = op1;
22212 else
22213 op1 = op0;
22214 }
22215 /* If both operands are equal, fold to simpler permutation. */
22216 if (rtx_equal_p (op0, op1))
22217 {
22218 perm0 = perm0 & 1;
22219 perm1 = (perm1 & 1) + 2;
22220 }
22221 /* If the first selector comes from the second operand, swap. */
22222 else if (perm0 & 2)
22223 {
22224 if (perm1 & 2)
22225 return false;
22226 perm0 -= 2;
22227 perm1 += 2;
22228 std::swap (op0, op1);
22229 }
22230 /* If the second selector does not come from the second operand, fail. */
22231 else if ((perm1 & 2) == 0)
22232 return false;
22233
22234 /* Success! */
22235 if (target != NULL)
22236 {
22237 machine_mode vmode, dmode;
22238 rtvec v;
22239
22240 vmode = GET_MODE (target);
22241 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22242 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22243 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22244 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22245 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22246 emit_insn (gen_rtx_SET (target, x));
22247 }
22248 return true;
22249 }
22250
22251 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22252
22253 static bool
22254 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22255 rtx op1, const vec_perm_indices &sel)
22256 {
22257 bool testing_p = !target;
22258
22259 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22260 if (TARGET_ALTIVEC && testing_p)
22261 return true;
22262
22263 /* Check for ps_merge* or xxpermdi insns. */
22264 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22265 {
22266 if (testing_p)
22267 {
22268 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22269 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22270 }
22271 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22272 return true;
22273 }
22274
22275 if (TARGET_ALTIVEC)
22276 {
22277 /* Force the target-independent code to lower to V16QImode. */
22278 if (vmode != V16QImode)
22279 return false;
22280 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22281 return true;
22282 }
22283
22284 return false;
22285 }
22286
22287 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22288 OP0 and OP1 are the input vectors and TARGET is the output vector.
22289 PERM specifies the constant permutation vector. */
22290
22291 static void
22292 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22293 machine_mode vmode, const vec_perm_builder &perm)
22294 {
22295 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22296 if (x != target)
22297 emit_move_insn (target, x);
22298 }
22299
22300 /* Expand an extract even operation. */
22301
22302 void
22303 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22304 {
22305 machine_mode vmode = GET_MODE (target);
22306 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22307 vec_perm_builder perm (nelt, nelt, 1);
22308
22309 for (i = 0; i < nelt; i++)
22310 perm.quick_push (i * 2);
22311
22312 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22313 }
22314
22315 /* Expand a vector interleave operation. */
22316
22317 void
22318 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22319 {
22320 machine_mode vmode = GET_MODE (target);
22321 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22322 vec_perm_builder perm (nelt, nelt, 1);
22323
22324 high = (highp ? 0 : nelt / 2);
22325 for (i = 0; i < nelt / 2; i++)
22326 {
22327 perm.quick_push (i + high);
22328 perm.quick_push (i + nelt + high);
22329 }
22330
22331 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22332 }
22333
22334 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22335 void
22336 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22337 {
22338 HOST_WIDE_INT hwi_scale (scale);
22339 REAL_VALUE_TYPE r_pow;
22340 rtvec v = rtvec_alloc (2);
22341 rtx elt;
22342 rtx scale_vec = gen_reg_rtx (V2DFmode);
22343 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22344 elt = const_double_from_real_value (r_pow, DFmode);
22345 RTVEC_ELT (v, 0) = elt;
22346 RTVEC_ELT (v, 1) = elt;
22347 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22348 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22349 }
22350
22351 /* Return an RTX representing where to find the function value of a
22352 function returning MODE. */
22353 static rtx
22354 rs6000_complex_function_value (machine_mode mode)
22355 {
22356 unsigned int regno;
22357 rtx r1, r2;
22358 machine_mode inner = GET_MODE_INNER (mode);
22359 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22360
22361 if (TARGET_FLOAT128_TYPE
22362 && (mode == KCmode
22363 || (mode == TCmode && TARGET_IEEEQUAD)))
22364 regno = ALTIVEC_ARG_RETURN;
22365
22366 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22367 regno = FP_ARG_RETURN;
22368
22369 else
22370 {
22371 regno = GP_ARG_RETURN;
22372
22373 /* 32-bit is OK since it'll go in r3/r4. */
22374 if (TARGET_32BIT && inner_bytes >= 4)
22375 return gen_rtx_REG (mode, regno);
22376 }
22377
22378 if (inner_bytes >= 8)
22379 return gen_rtx_REG (mode, regno);
22380
22381 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22382 const0_rtx);
22383 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22384 GEN_INT (inner_bytes));
22385 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22386 }
22387
22388 /* Return an rtx describing a return value of MODE as a PARALLEL
22389 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22390 stride REG_STRIDE. */
22391
22392 static rtx
22393 rs6000_parallel_return (machine_mode mode,
22394 int n_elts, machine_mode elt_mode,
22395 unsigned int regno, unsigned int reg_stride)
22396 {
22397 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22398
22399 int i;
22400 for (i = 0; i < n_elts; i++)
22401 {
22402 rtx r = gen_rtx_REG (elt_mode, regno);
22403 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22404 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22405 regno += reg_stride;
22406 }
22407
22408 return par;
22409 }
22410
22411 /* Target hook for TARGET_FUNCTION_VALUE.
22412
22413 An integer value is in r3 and a floating-point value is in fp1,
22414 unless -msoft-float. */
22415
22416 static rtx
22417 rs6000_function_value (const_tree valtype,
22418 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22419 bool outgoing ATTRIBUTE_UNUSED)
22420 {
22421 machine_mode mode;
22422 unsigned int regno;
22423 machine_mode elt_mode;
22424 int n_elts;
22425
22426 /* Special handling for structs in darwin64. */
22427 if (TARGET_MACHO
22428 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22429 {
22430 CUMULATIVE_ARGS valcum;
22431 rtx valret;
22432
22433 valcum.words = 0;
22434 valcum.fregno = FP_ARG_MIN_REG;
22435 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22436 /* Do a trial code generation as if this were going to be passed as
22437 an argument; if any part goes in memory, we return NULL. */
22438 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22439 if (valret)
22440 return valret;
22441 /* Otherwise fall through to standard ABI rules. */
22442 }
22443
22444 mode = TYPE_MODE (valtype);
22445
22446 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22447 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22448 {
22449 int first_reg, n_regs;
22450
22451 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22452 {
22453 /* _Decimal128 must use even/odd register pairs. */
22454 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22455 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22456 }
22457 else
22458 {
22459 first_reg = ALTIVEC_ARG_RETURN;
22460 n_regs = 1;
22461 }
22462
22463 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22464 }
22465
22466 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22467 if (TARGET_32BIT && TARGET_POWERPC64)
22468 switch (mode)
22469 {
22470 default:
22471 break;
22472 case E_DImode:
22473 case E_SCmode:
22474 case E_DCmode:
22475 case E_TCmode:
22476 int count = GET_MODE_SIZE (mode) / 4;
22477 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22478 }
22479
22480 if ((INTEGRAL_TYPE_P (valtype)
22481 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22482 || POINTER_TYPE_P (valtype))
22483 mode = TARGET_32BIT ? SImode : DImode;
22484
22485 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22486 /* _Decimal128 must use an even/odd register pair. */
22487 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22488 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22489 && !FLOAT128_VECTOR_P (mode))
22490 regno = FP_ARG_RETURN;
22491 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22492 && targetm.calls.split_complex_arg)
22493 return rs6000_complex_function_value (mode);
22494 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22495 return register is used in both cases, and we won't see V2DImode/V2DFmode
22496 for pure altivec, combine the two cases. */
22497 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22498 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22499 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22500 regno = ALTIVEC_ARG_RETURN;
22501 else
22502 regno = GP_ARG_RETURN;
22503
22504 return gen_rtx_REG (mode, regno);
22505 }
22506
22507 /* Define how to find the value returned by a library function
22508 assuming the value has mode MODE. */
22509 rtx
22510 rs6000_libcall_value (machine_mode mode)
22511 {
22512 unsigned int regno;
22513
22514 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22515 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22516 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22517
22518 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22519 /* _Decimal128 must use an even/odd register pair. */
22520 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22521 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22522 regno = FP_ARG_RETURN;
22523 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22524 return register is used in both cases, and we won't see V2DImode/V2DFmode
22525 for pure altivec, combine the two cases. */
22526 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22527 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22528 regno = ALTIVEC_ARG_RETURN;
22529 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22530 return rs6000_complex_function_value (mode);
22531 else
22532 regno = GP_ARG_RETURN;
22533
22534 return gen_rtx_REG (mode, regno);
22535 }
22536
22537 /* Compute register pressure classes. We implement the target hook to avoid
22538 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22539 lead to incorrect estimates of number of available registers and therefor
22540 increased register pressure/spill. */
22541 static int
22542 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22543 {
22544 int n;
22545
22546 n = 0;
22547 pressure_classes[n++] = GENERAL_REGS;
22548 if (TARGET_VSX)
22549 pressure_classes[n++] = VSX_REGS;
22550 else
22551 {
22552 if (TARGET_ALTIVEC)
22553 pressure_classes[n++] = ALTIVEC_REGS;
22554 if (TARGET_HARD_FLOAT)
22555 pressure_classes[n++] = FLOAT_REGS;
22556 }
22557 pressure_classes[n++] = CR_REGS;
22558 pressure_classes[n++] = SPECIAL_REGS;
22559
22560 return n;
22561 }
22562
22563 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22564 Frame pointer elimination is automatically handled.
22565
22566 For the RS/6000, if frame pointer elimination is being done, we would like
22567 to convert ap into fp, not sp.
22568
22569 We need r30 if -mminimal-toc was specified, and there are constant pool
22570 references. */
22571
22572 static bool
22573 rs6000_can_eliminate (const int from, const int to)
22574 {
22575 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22576 ? ! frame_pointer_needed
22577 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22578 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22579 || constant_pool_empty_p ()
22580 : true);
22581 }
22582
22583 /* Define the offset between two registers, FROM to be eliminated and its
22584 replacement TO, at the start of a routine. */
22585 HOST_WIDE_INT
22586 rs6000_initial_elimination_offset (int from, int to)
22587 {
22588 rs6000_stack_t *info = rs6000_stack_info ();
22589 HOST_WIDE_INT offset;
22590
22591 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22592 offset = info->push_p ? 0 : -info->total_size;
22593 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22594 {
22595 offset = info->push_p ? 0 : -info->total_size;
22596 if (FRAME_GROWS_DOWNWARD)
22597 offset += info->fixed_size + info->vars_size + info->parm_size;
22598 }
22599 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22600 offset = FRAME_GROWS_DOWNWARD
22601 ? info->fixed_size + info->vars_size + info->parm_size
22602 : 0;
22603 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22604 offset = info->total_size;
22605 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22606 offset = info->push_p ? info->total_size : 0;
22607 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22608 offset = 0;
22609 else
22610 gcc_unreachable ();
22611
22612 return offset;
22613 }
22614
22615 /* Fill in sizes of registers used by unwinder. */
22616
22617 static void
22618 rs6000_init_dwarf_reg_sizes_extra (tree address)
22619 {
22620 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22621 {
22622 int i;
22623 machine_mode mode = TYPE_MODE (char_type_node);
22624 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22625 rtx mem = gen_rtx_MEM (BLKmode, addr);
22626 rtx value = gen_int_mode (16, mode);
22627
22628 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22629 The unwinder still needs to know the size of Altivec registers. */
22630
22631 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22632 {
22633 int column = DWARF_REG_TO_UNWIND_COLUMN
22634 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22635 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22636
22637 emit_move_insn (adjust_address (mem, mode, offset), value);
22638 }
22639 }
22640 }
22641
22642 /* Map internal gcc register numbers to debug format register numbers.
22643 FORMAT specifies the type of debug register number to use:
22644 0 -- debug information, except for frame-related sections
22645 1 -- DWARF .debug_frame section
22646 2 -- DWARF .eh_frame section */
22647
22648 unsigned int
22649 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22650 {
22651 /* On some platforms, we use the standard DWARF register
22652 numbering for .debug_info and .debug_frame. */
22653 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22654 {
22655 #ifdef RS6000_USE_DWARF_NUMBERING
22656 if (regno <= 31)
22657 return regno;
22658 if (FP_REGNO_P (regno))
22659 return regno - FIRST_FPR_REGNO + 32;
22660 if (ALTIVEC_REGNO_P (regno))
22661 return regno - FIRST_ALTIVEC_REGNO + 1124;
22662 if (regno == LR_REGNO)
22663 return 108;
22664 if (regno == CTR_REGNO)
22665 return 109;
22666 if (regno == CA_REGNO)
22667 return 101; /* XER */
22668 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22669 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22670 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22671 to the DWARF reg for CR. */
22672 if (format == 1 && regno == CR2_REGNO)
22673 return 64;
22674 if (CR_REGNO_P (regno))
22675 return regno - CR0_REGNO + 86;
22676 if (regno == VRSAVE_REGNO)
22677 return 356;
22678 if (regno == VSCR_REGNO)
22679 return 67;
22680
22681 /* These do not make much sense. */
22682 if (regno == FRAME_POINTER_REGNUM)
22683 return 111;
22684 if (regno == ARG_POINTER_REGNUM)
22685 return 67;
22686 if (regno == 64)
22687 return 100;
22688
22689 gcc_unreachable ();
22690 #endif
22691 }
22692
22693 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22694 information, and also for .eh_frame. */
22695 /* Translate the regnos to their numbers in GCC 7 (and before). */
22696 if (regno <= 31)
22697 return regno;
22698 if (FP_REGNO_P (regno))
22699 return regno - FIRST_FPR_REGNO + 32;
22700 if (ALTIVEC_REGNO_P (regno))
22701 return regno - FIRST_ALTIVEC_REGNO + 77;
22702 if (regno == LR_REGNO)
22703 return 65;
22704 if (regno == CTR_REGNO)
22705 return 66;
22706 if (regno == CA_REGNO)
22707 return 76; /* XER */
22708 if (CR_REGNO_P (regno))
22709 return regno - CR0_REGNO + 68;
22710 if (regno == VRSAVE_REGNO)
22711 return 109;
22712 if (regno == VSCR_REGNO)
22713 return 110;
22714
22715 if (regno == FRAME_POINTER_REGNUM)
22716 return 111;
22717 if (regno == ARG_POINTER_REGNUM)
22718 return 67;
22719 if (regno == 64)
22720 return 64;
22721
22722 gcc_unreachable ();
22723 }
22724
22725 /* target hook eh_return_filter_mode */
22726 static scalar_int_mode
22727 rs6000_eh_return_filter_mode (void)
22728 {
22729 return TARGET_32BIT ? SImode : word_mode;
22730 }
22731
22732 /* Target hook for translate_mode_attribute. */
22733 static machine_mode
22734 rs6000_translate_mode_attribute (machine_mode mode)
22735 {
22736 if ((FLOAT128_IEEE_P (mode)
22737 && ieee128_float_type_node == long_double_type_node)
22738 || (FLOAT128_IBM_P (mode)
22739 && ibm128_float_type_node == long_double_type_node))
22740 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22741 return mode;
22742 }
22743
22744 /* Target hook for scalar_mode_supported_p. */
22745 static bool
22746 rs6000_scalar_mode_supported_p (scalar_mode mode)
22747 {
22748 /* -m32 does not support TImode. This is the default, from
22749 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22750 same ABI as for -m32. But default_scalar_mode_supported_p allows
22751 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22752 for -mpowerpc64. */
22753 if (TARGET_32BIT && mode == TImode)
22754 return false;
22755
22756 if (DECIMAL_FLOAT_MODE_P (mode))
22757 return default_decimal_float_supported_p ();
22758 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22759 return true;
22760 else
22761 return default_scalar_mode_supported_p (mode);
22762 }
22763
22764 /* Target hook for vector_mode_supported_p. */
22765 static bool
22766 rs6000_vector_mode_supported_p (machine_mode mode)
22767 {
22768 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22769 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22770 double-double. */
22771 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22772 return true;
22773
22774 else
22775 return false;
22776 }
22777
22778 /* Target hook for floatn_mode. */
22779 static opt_scalar_float_mode
22780 rs6000_floatn_mode (int n, bool extended)
22781 {
22782 if (extended)
22783 {
22784 switch (n)
22785 {
22786 case 32:
22787 return DFmode;
22788
22789 case 64:
22790 if (TARGET_FLOAT128_TYPE)
22791 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22792 else
22793 return opt_scalar_float_mode ();
22794
22795 case 128:
22796 return opt_scalar_float_mode ();
22797
22798 default:
22799 /* Those are the only valid _FloatNx types. */
22800 gcc_unreachable ();
22801 }
22802 }
22803 else
22804 {
22805 switch (n)
22806 {
22807 case 32:
22808 return SFmode;
22809
22810 case 64:
22811 return DFmode;
22812
22813 case 128:
22814 if (TARGET_FLOAT128_TYPE)
22815 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22816 else
22817 return opt_scalar_float_mode ();
22818
22819 default:
22820 return opt_scalar_float_mode ();
22821 }
22822 }
22823
22824 }
22825
22826 /* Target hook for c_mode_for_suffix. */
22827 static machine_mode
22828 rs6000_c_mode_for_suffix (char suffix)
22829 {
22830 if (TARGET_FLOAT128_TYPE)
22831 {
22832 if (suffix == 'q' || suffix == 'Q')
22833 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22834
22835 /* At the moment, we are not defining a suffix for IBM extended double.
22836 If/when the default for -mabi=ieeelongdouble is changed, and we want
22837 to support __ibm128 constants in legacy library code, we may need to
22838 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22839 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22840 __float80 constants. */
22841 }
22842
22843 return VOIDmode;
22844 }
22845
22846 /* Target hook for invalid_arg_for_unprototyped_fn. */
22847 static const char *
22848 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22849 {
22850 return (!rs6000_darwin64_abi
22851 && typelist == 0
22852 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22853 && (funcdecl == NULL_TREE
22854 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22855 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22856 ? N_("AltiVec argument passed to unprototyped function")
22857 : NULL;
22858 }
22859
22860 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22861 setup by using __stack_chk_fail_local hidden function instead of
22862 calling __stack_chk_fail directly. Otherwise it is better to call
22863 __stack_chk_fail directly. */
22864
22865 static tree ATTRIBUTE_UNUSED
22866 rs6000_stack_protect_fail (void)
22867 {
22868 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22869 ? default_hidden_stack_protect_fail ()
22870 : default_external_stack_protect_fail ();
22871 }
22872
22873 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22874
22875 #if TARGET_ELF
22876 static unsigned HOST_WIDE_INT
22877 rs6000_asan_shadow_offset (void)
22878 {
22879 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22880 }
22881 #endif
22882 \f
22883 /* Mask options that we want to support inside of attribute((target)) and
22884 #pragma GCC target operations. Note, we do not include things like
22885 64/32-bit, endianness, hard/soft floating point, etc. that would have
22886 different calling sequences. */
22887
22888 struct rs6000_opt_mask {
22889 const char *name; /* option name */
22890 HOST_WIDE_INT mask; /* mask to set */
22891 bool invert; /* invert sense of mask */
22892 bool valid_target; /* option is a target option */
22893 };
22894
22895 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22896 {
22897 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22898 { "cmpb", OPTION_MASK_CMPB, false, true },
22899 { "crypto", OPTION_MASK_CRYPTO, false, true },
22900 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22901 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22902 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22903 false, true },
22904 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22905 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22906 { "fprnd", OPTION_MASK_FPRND, false, true },
22907 { "future", OPTION_MASK_FUTURE, false, true },
22908 { "hard-dfp", OPTION_MASK_DFP, false, true },
22909 { "htm", OPTION_MASK_HTM, false, true },
22910 { "isel", OPTION_MASK_ISEL, false, true },
22911 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22912 { "mfpgpr", 0, false, true },
22913 { "modulo", OPTION_MASK_MODULO, false, true },
22914 { "mulhw", OPTION_MASK_MULHW, false, true },
22915 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22916 { "pcrel", OPTION_MASK_PCREL, false, true },
22917 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22918 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22919 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22920 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22921 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22922 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22923 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22924 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22925 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22926 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22927 { "prefixed", OPTION_MASK_PREFIXED, false, true },
22928 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22929 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22930 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22931 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22932 { "string", 0, false, true },
22933 { "update", OPTION_MASK_NO_UPDATE, true , true },
22934 { "vsx", OPTION_MASK_VSX, false, true },
22935 #ifdef OPTION_MASK_64BIT
22936 #if TARGET_AIX_OS
22937 { "aix64", OPTION_MASK_64BIT, false, false },
22938 { "aix32", OPTION_MASK_64BIT, true, false },
22939 #else
22940 { "64", OPTION_MASK_64BIT, false, false },
22941 { "32", OPTION_MASK_64BIT, true, false },
22942 #endif
22943 #endif
22944 #ifdef OPTION_MASK_EABI
22945 { "eabi", OPTION_MASK_EABI, false, false },
22946 #endif
22947 #ifdef OPTION_MASK_LITTLE_ENDIAN
22948 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22949 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22950 #endif
22951 #ifdef OPTION_MASK_RELOCATABLE
22952 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22953 #endif
22954 #ifdef OPTION_MASK_STRICT_ALIGN
22955 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22956 #endif
22957 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22958 { "string", 0, false, false },
22959 };
22960
22961 /* Builtin mask mapping for printing the flags. */
22962 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22963 {
22964 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22965 { "vsx", RS6000_BTM_VSX, false, false },
22966 { "fre", RS6000_BTM_FRE, false, false },
22967 { "fres", RS6000_BTM_FRES, false, false },
22968 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22969 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22970 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22971 { "cell", RS6000_BTM_CELL, false, false },
22972 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22973 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22974 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22975 { "crypto", RS6000_BTM_CRYPTO, false, false },
22976 { "htm", RS6000_BTM_HTM, false, false },
22977 { "hard-dfp", RS6000_BTM_DFP, false, false },
22978 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22979 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22980 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22981 { "float128", RS6000_BTM_FLOAT128, false, false },
22982 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22983 };
22984
22985 /* Option variables that we want to support inside attribute((target)) and
22986 #pragma GCC target operations. */
22987
22988 struct rs6000_opt_var {
22989 const char *name; /* option name */
22990 size_t global_offset; /* offset of the option in global_options. */
22991 size_t target_offset; /* offset of the option in target options. */
22992 };
22993
22994 static struct rs6000_opt_var const rs6000_opt_vars[] =
22995 {
22996 { "friz",
22997 offsetof (struct gcc_options, x_TARGET_FRIZ),
22998 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22999 { "avoid-indexed-addresses",
23000 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23001 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23002 { "longcall",
23003 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23004 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23005 { "optimize-swaps",
23006 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23007 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23008 { "allow-movmisalign",
23009 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23010 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23011 { "sched-groups",
23012 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23013 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23014 { "always-hint",
23015 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23016 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23017 { "align-branch-targets",
23018 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23019 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23020 { "sched-prolog",
23021 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23022 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23023 { "sched-epilog",
23024 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23025 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23026 { "speculate-indirect-jumps",
23027 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23028 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23029 };
23030
23031 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23032 parsing. Return true if there were no errors. */
23033
23034 static bool
23035 rs6000_inner_target_options (tree args, bool attr_p)
23036 {
23037 bool ret = true;
23038
23039 if (args == NULL_TREE)
23040 ;
23041
23042 else if (TREE_CODE (args) == STRING_CST)
23043 {
23044 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23045 char *q;
23046
23047 while ((q = strtok (p, ",")) != NULL)
23048 {
23049 bool error_p = false;
23050 bool not_valid_p = false;
23051 const char *cpu_opt = NULL;
23052
23053 p = NULL;
23054 if (strncmp (q, "cpu=", 4) == 0)
23055 {
23056 int cpu_index = rs6000_cpu_name_lookup (q+4);
23057 if (cpu_index >= 0)
23058 rs6000_cpu_index = cpu_index;
23059 else
23060 {
23061 error_p = true;
23062 cpu_opt = q+4;
23063 }
23064 }
23065 else if (strncmp (q, "tune=", 5) == 0)
23066 {
23067 int tune_index = rs6000_cpu_name_lookup (q+5);
23068 if (tune_index >= 0)
23069 rs6000_tune_index = tune_index;
23070 else
23071 {
23072 error_p = true;
23073 cpu_opt = q+5;
23074 }
23075 }
23076 else
23077 {
23078 size_t i;
23079 bool invert = false;
23080 char *r = q;
23081
23082 error_p = true;
23083 if (strncmp (r, "no-", 3) == 0)
23084 {
23085 invert = true;
23086 r += 3;
23087 }
23088
23089 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23090 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23091 {
23092 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23093
23094 if (!rs6000_opt_masks[i].valid_target)
23095 not_valid_p = true;
23096 else
23097 {
23098 error_p = false;
23099 rs6000_isa_flags_explicit |= mask;
23100
23101 /* VSX needs altivec, so -mvsx automagically sets
23102 altivec and disables -mavoid-indexed-addresses. */
23103 if (!invert)
23104 {
23105 if (mask == OPTION_MASK_VSX)
23106 {
23107 mask |= OPTION_MASK_ALTIVEC;
23108 TARGET_AVOID_XFORM = 0;
23109 }
23110 }
23111
23112 if (rs6000_opt_masks[i].invert)
23113 invert = !invert;
23114
23115 if (invert)
23116 rs6000_isa_flags &= ~mask;
23117 else
23118 rs6000_isa_flags |= mask;
23119 }
23120 break;
23121 }
23122
23123 if (error_p && !not_valid_p)
23124 {
23125 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23126 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23127 {
23128 size_t j = rs6000_opt_vars[i].global_offset;
23129 *((int *) ((char *)&global_options + j)) = !invert;
23130 error_p = false;
23131 not_valid_p = false;
23132 break;
23133 }
23134 }
23135 }
23136
23137 if (error_p)
23138 {
23139 const char *eprefix, *esuffix;
23140
23141 ret = false;
23142 if (attr_p)
23143 {
23144 eprefix = "__attribute__((__target__(";
23145 esuffix = ")))";
23146 }
23147 else
23148 {
23149 eprefix = "#pragma GCC target ";
23150 esuffix = "";
23151 }
23152
23153 if (cpu_opt)
23154 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23155 q, esuffix);
23156 else if (not_valid_p)
23157 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23158 else
23159 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23160 }
23161 }
23162 }
23163
23164 else if (TREE_CODE (args) == TREE_LIST)
23165 {
23166 do
23167 {
23168 tree value = TREE_VALUE (args);
23169 if (value)
23170 {
23171 bool ret2 = rs6000_inner_target_options (value, attr_p);
23172 if (!ret2)
23173 ret = false;
23174 }
23175 args = TREE_CHAIN (args);
23176 }
23177 while (args != NULL_TREE);
23178 }
23179
23180 else
23181 {
23182 error ("attribute %<target%> argument not a string");
23183 return false;
23184 }
23185
23186 return ret;
23187 }
23188
23189 /* Print out the target options as a list for -mdebug=target. */
23190
23191 static void
23192 rs6000_debug_target_options (tree args, const char *prefix)
23193 {
23194 if (args == NULL_TREE)
23195 fprintf (stderr, "%s<NULL>", prefix);
23196
23197 else if (TREE_CODE (args) == STRING_CST)
23198 {
23199 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23200 char *q;
23201
23202 while ((q = strtok (p, ",")) != NULL)
23203 {
23204 p = NULL;
23205 fprintf (stderr, "%s\"%s\"", prefix, q);
23206 prefix = ", ";
23207 }
23208 }
23209
23210 else if (TREE_CODE (args) == TREE_LIST)
23211 {
23212 do
23213 {
23214 tree value = TREE_VALUE (args);
23215 if (value)
23216 {
23217 rs6000_debug_target_options (value, prefix);
23218 prefix = ", ";
23219 }
23220 args = TREE_CHAIN (args);
23221 }
23222 while (args != NULL_TREE);
23223 }
23224
23225 else
23226 gcc_unreachable ();
23227
23228 return;
23229 }
23230
23231 \f
23232 /* Hook to validate attribute((target("..."))). */
23233
23234 static bool
23235 rs6000_valid_attribute_p (tree fndecl,
23236 tree ARG_UNUSED (name),
23237 tree args,
23238 int flags)
23239 {
23240 struct cl_target_option cur_target;
23241 bool ret;
23242 tree old_optimize;
23243 tree new_target, new_optimize;
23244 tree func_optimize;
23245
23246 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23247
23248 if (TARGET_DEBUG_TARGET)
23249 {
23250 tree tname = DECL_NAME (fndecl);
23251 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23252 if (tname)
23253 fprintf (stderr, "function: %.*s\n",
23254 (int) IDENTIFIER_LENGTH (tname),
23255 IDENTIFIER_POINTER (tname));
23256 else
23257 fprintf (stderr, "function: unknown\n");
23258
23259 fprintf (stderr, "args:");
23260 rs6000_debug_target_options (args, " ");
23261 fprintf (stderr, "\n");
23262
23263 if (flags)
23264 fprintf (stderr, "flags: 0x%x\n", flags);
23265
23266 fprintf (stderr, "--------------------\n");
23267 }
23268
23269 /* attribute((target("default"))) does nothing, beyond
23270 affecting multi-versioning. */
23271 if (TREE_VALUE (args)
23272 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23273 && TREE_CHAIN (args) == NULL_TREE
23274 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23275 return true;
23276
23277 old_optimize = build_optimization_node (&global_options);
23278 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23279
23280 /* If the function changed the optimization levels as well as setting target
23281 options, start with the optimizations specified. */
23282 if (func_optimize && func_optimize != old_optimize)
23283 cl_optimization_restore (&global_options,
23284 TREE_OPTIMIZATION (func_optimize));
23285
23286 /* The target attributes may also change some optimization flags, so update
23287 the optimization options if necessary. */
23288 cl_target_option_save (&cur_target, &global_options);
23289 rs6000_cpu_index = rs6000_tune_index = -1;
23290 ret = rs6000_inner_target_options (args, true);
23291
23292 /* Set up any additional state. */
23293 if (ret)
23294 {
23295 ret = rs6000_option_override_internal (false);
23296 new_target = build_target_option_node (&global_options);
23297 }
23298 else
23299 new_target = NULL;
23300
23301 new_optimize = build_optimization_node (&global_options);
23302
23303 if (!new_target)
23304 ret = false;
23305
23306 else if (fndecl)
23307 {
23308 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23309
23310 if (old_optimize != new_optimize)
23311 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23312 }
23313
23314 cl_target_option_restore (&global_options, &cur_target);
23315
23316 if (old_optimize != new_optimize)
23317 cl_optimization_restore (&global_options,
23318 TREE_OPTIMIZATION (old_optimize));
23319
23320 return ret;
23321 }
23322
23323 \f
23324 /* Hook to validate the current #pragma GCC target and set the state, and
23325 update the macros based on what was changed. If ARGS is NULL, then
23326 POP_TARGET is used to reset the options. */
23327
23328 bool
23329 rs6000_pragma_target_parse (tree args, tree pop_target)
23330 {
23331 tree prev_tree = build_target_option_node (&global_options);
23332 tree cur_tree;
23333 struct cl_target_option *prev_opt, *cur_opt;
23334 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23335 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23336
23337 if (TARGET_DEBUG_TARGET)
23338 {
23339 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23340 fprintf (stderr, "args:");
23341 rs6000_debug_target_options (args, " ");
23342 fprintf (stderr, "\n");
23343
23344 if (pop_target)
23345 {
23346 fprintf (stderr, "pop_target:\n");
23347 debug_tree (pop_target);
23348 }
23349 else
23350 fprintf (stderr, "pop_target: <NULL>\n");
23351
23352 fprintf (stderr, "--------------------\n");
23353 }
23354
23355 if (! args)
23356 {
23357 cur_tree = ((pop_target)
23358 ? pop_target
23359 : target_option_default_node);
23360 cl_target_option_restore (&global_options,
23361 TREE_TARGET_OPTION (cur_tree));
23362 }
23363 else
23364 {
23365 rs6000_cpu_index = rs6000_tune_index = -1;
23366 if (!rs6000_inner_target_options (args, false)
23367 || !rs6000_option_override_internal (false)
23368 || (cur_tree = build_target_option_node (&global_options))
23369 == NULL_TREE)
23370 {
23371 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23372 fprintf (stderr, "invalid pragma\n");
23373
23374 return false;
23375 }
23376 }
23377
23378 target_option_current_node = cur_tree;
23379 rs6000_activate_target_options (target_option_current_node);
23380
23381 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23382 change the macros that are defined. */
23383 if (rs6000_target_modify_macros_ptr)
23384 {
23385 prev_opt = TREE_TARGET_OPTION (prev_tree);
23386 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23387 prev_flags = prev_opt->x_rs6000_isa_flags;
23388
23389 cur_opt = TREE_TARGET_OPTION (cur_tree);
23390 cur_flags = cur_opt->x_rs6000_isa_flags;
23391 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23392
23393 diff_bumask = (prev_bumask ^ cur_bumask);
23394 diff_flags = (prev_flags ^ cur_flags);
23395
23396 if ((diff_flags != 0) || (diff_bumask != 0))
23397 {
23398 /* Delete old macros. */
23399 rs6000_target_modify_macros_ptr (false,
23400 prev_flags & diff_flags,
23401 prev_bumask & diff_bumask);
23402
23403 /* Define new macros. */
23404 rs6000_target_modify_macros_ptr (true,
23405 cur_flags & diff_flags,
23406 cur_bumask & diff_bumask);
23407 }
23408 }
23409
23410 return true;
23411 }
23412
23413 \f
23414 /* Remember the last target of rs6000_set_current_function. */
23415 static GTY(()) tree rs6000_previous_fndecl;
23416
23417 /* Restore target's globals from NEW_TREE and invalidate the
23418 rs6000_previous_fndecl cache. */
23419
23420 void
23421 rs6000_activate_target_options (tree new_tree)
23422 {
23423 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23424 if (TREE_TARGET_GLOBALS (new_tree))
23425 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23426 else if (new_tree == target_option_default_node)
23427 restore_target_globals (&default_target_globals);
23428 else
23429 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23430 rs6000_previous_fndecl = NULL_TREE;
23431 }
23432
23433 /* Establish appropriate back-end context for processing the function
23434 FNDECL. The argument might be NULL to indicate processing at top
23435 level, outside of any function scope. */
23436 static void
23437 rs6000_set_current_function (tree fndecl)
23438 {
23439 if (TARGET_DEBUG_TARGET)
23440 {
23441 fprintf (stderr, "\n==================== rs6000_set_current_function");
23442
23443 if (fndecl)
23444 fprintf (stderr, ", fndecl %s (%p)",
23445 (DECL_NAME (fndecl)
23446 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23447 : "<unknown>"), (void *)fndecl);
23448
23449 if (rs6000_previous_fndecl)
23450 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23451
23452 fprintf (stderr, "\n");
23453 }
23454
23455 /* Only change the context if the function changes. This hook is called
23456 several times in the course of compiling a function, and we don't want to
23457 slow things down too much or call target_reinit when it isn't safe. */
23458 if (fndecl == rs6000_previous_fndecl)
23459 return;
23460
23461 tree old_tree;
23462 if (rs6000_previous_fndecl == NULL_TREE)
23463 old_tree = target_option_current_node;
23464 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23465 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23466 else
23467 old_tree = target_option_default_node;
23468
23469 tree new_tree;
23470 if (fndecl == NULL_TREE)
23471 {
23472 if (old_tree != target_option_current_node)
23473 new_tree = target_option_current_node;
23474 else
23475 new_tree = NULL_TREE;
23476 }
23477 else
23478 {
23479 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23480 if (new_tree == NULL_TREE)
23481 new_tree = target_option_default_node;
23482 }
23483
23484 if (TARGET_DEBUG_TARGET)
23485 {
23486 if (new_tree)
23487 {
23488 fprintf (stderr, "\nnew fndecl target specific options:\n");
23489 debug_tree (new_tree);
23490 }
23491
23492 if (old_tree)
23493 {
23494 fprintf (stderr, "\nold fndecl target specific options:\n");
23495 debug_tree (old_tree);
23496 }
23497
23498 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23499 fprintf (stderr, "--------------------\n");
23500 }
23501
23502 if (new_tree && old_tree != new_tree)
23503 rs6000_activate_target_options (new_tree);
23504
23505 if (fndecl)
23506 rs6000_previous_fndecl = fndecl;
23507 }
23508
23509 \f
23510 /* Save the current options */
23511
23512 static void
23513 rs6000_function_specific_save (struct cl_target_option *ptr,
23514 struct gcc_options *opts)
23515 {
23516 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23517 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23518 }
23519
23520 /* Restore the current options */
23521
23522 static void
23523 rs6000_function_specific_restore (struct gcc_options *opts,
23524 struct cl_target_option *ptr)
23525
23526 {
23527 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23528 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23529 (void) rs6000_option_override_internal (false);
23530 }
23531
23532 /* Print the current options */
23533
23534 static void
23535 rs6000_function_specific_print (FILE *file, int indent,
23536 struct cl_target_option *ptr)
23537 {
23538 rs6000_print_isa_options (file, indent, "Isa options set",
23539 ptr->x_rs6000_isa_flags);
23540
23541 rs6000_print_isa_options (file, indent, "Isa options explicit",
23542 ptr->x_rs6000_isa_flags_explicit);
23543 }
23544
23545 /* Helper function to print the current isa or misc options on a line. */
23546
23547 static void
23548 rs6000_print_options_internal (FILE *file,
23549 int indent,
23550 const char *string,
23551 HOST_WIDE_INT flags,
23552 const char *prefix,
23553 const struct rs6000_opt_mask *opts,
23554 size_t num_elements)
23555 {
23556 size_t i;
23557 size_t start_column = 0;
23558 size_t cur_column;
23559 size_t max_column = 120;
23560 size_t prefix_len = strlen (prefix);
23561 size_t comma_len = 0;
23562 const char *comma = "";
23563
23564 if (indent)
23565 start_column += fprintf (file, "%*s", indent, "");
23566
23567 if (!flags)
23568 {
23569 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23570 return;
23571 }
23572
23573 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23574
23575 /* Print the various mask options. */
23576 cur_column = start_column;
23577 for (i = 0; i < num_elements; i++)
23578 {
23579 bool invert = opts[i].invert;
23580 const char *name = opts[i].name;
23581 const char *no_str = "";
23582 HOST_WIDE_INT mask = opts[i].mask;
23583 size_t len = comma_len + prefix_len + strlen (name);
23584
23585 if (!invert)
23586 {
23587 if ((flags & mask) == 0)
23588 {
23589 no_str = "no-";
23590 len += strlen ("no-");
23591 }
23592
23593 flags &= ~mask;
23594 }
23595
23596 else
23597 {
23598 if ((flags & mask) != 0)
23599 {
23600 no_str = "no-";
23601 len += strlen ("no-");
23602 }
23603
23604 flags |= mask;
23605 }
23606
23607 cur_column += len;
23608 if (cur_column > max_column)
23609 {
23610 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23611 cur_column = start_column + len;
23612 comma = "";
23613 }
23614
23615 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23616 comma = ", ";
23617 comma_len = strlen (", ");
23618 }
23619
23620 fputs ("\n", file);
23621 }
23622
23623 /* Helper function to print the current isa options on a line. */
23624
23625 static void
23626 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23627 HOST_WIDE_INT flags)
23628 {
23629 rs6000_print_options_internal (file, indent, string, flags, "-m",
23630 &rs6000_opt_masks[0],
23631 ARRAY_SIZE (rs6000_opt_masks));
23632 }
23633
23634 static void
23635 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23636 HOST_WIDE_INT flags)
23637 {
23638 rs6000_print_options_internal (file, indent, string, flags, "",
23639 &rs6000_builtin_mask_names[0],
23640 ARRAY_SIZE (rs6000_builtin_mask_names));
23641 }
23642
23643 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23644 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23645 -mupper-regs-df, etc.).
23646
23647 If the user used -mno-power8-vector, we need to turn off all of the implicit
23648 ISA 2.07 and 3.0 options that relate to the vector unit.
23649
23650 If the user used -mno-power9-vector, we need to turn off all of the implicit
23651 ISA 3.0 options that relate to the vector unit.
23652
23653 This function does not handle explicit options such as the user specifying
23654 -mdirect-move. These are handled in rs6000_option_override_internal, and
23655 the appropriate error is given if needed.
23656
23657 We return a mask of all of the implicit options that should not be enabled
23658 by default. */
23659
23660 static HOST_WIDE_INT
23661 rs6000_disable_incompatible_switches (void)
23662 {
23663 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23664 size_t i, j;
23665
23666 static const struct {
23667 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23668 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23669 const char *const name; /* name of the switch. */
23670 } flags[] = {
23671 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23672 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23673 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23674 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23675 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
23676 };
23677
23678 for (i = 0; i < ARRAY_SIZE (flags); i++)
23679 {
23680 HOST_WIDE_INT no_flag = flags[i].no_flag;
23681
23682 if ((rs6000_isa_flags & no_flag) == 0
23683 && (rs6000_isa_flags_explicit & no_flag) != 0)
23684 {
23685 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23686 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23687 & rs6000_isa_flags
23688 & dep_flags);
23689
23690 if (set_flags)
23691 {
23692 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23693 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23694 {
23695 set_flags &= ~rs6000_opt_masks[j].mask;
23696 error ("%<-mno-%s%> turns off %<-m%s%>",
23697 flags[i].name,
23698 rs6000_opt_masks[j].name);
23699 }
23700
23701 gcc_assert (!set_flags);
23702 }
23703
23704 rs6000_isa_flags &= ~dep_flags;
23705 ignore_masks |= no_flag | dep_flags;
23706 }
23707 }
23708
23709 return ignore_masks;
23710 }
23711
23712 \f
23713 /* Helper function for printing the function name when debugging. */
23714
23715 static const char *
23716 get_decl_name (tree fn)
23717 {
23718 tree name;
23719
23720 if (!fn)
23721 return "<null>";
23722
23723 name = DECL_NAME (fn);
23724 if (!name)
23725 return "<no-name>";
23726
23727 return IDENTIFIER_POINTER (name);
23728 }
23729
23730 /* Return the clone id of the target we are compiling code for in a target
23731 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23732 the priority list for the target clones (ordered from lowest to
23733 highest). */
23734
23735 static int
23736 rs6000_clone_priority (tree fndecl)
23737 {
23738 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23739 HOST_WIDE_INT isa_masks;
23740 int ret = CLONE_DEFAULT;
23741 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23742 const char *attrs_str = NULL;
23743
23744 attrs = TREE_VALUE (TREE_VALUE (attrs));
23745 attrs_str = TREE_STRING_POINTER (attrs);
23746
23747 /* Return priority zero for default function. Return the ISA needed for the
23748 function if it is not the default. */
23749 if (strcmp (attrs_str, "default") != 0)
23750 {
23751 if (fn_opts == NULL_TREE)
23752 fn_opts = target_option_default_node;
23753
23754 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23755 isa_masks = rs6000_isa_flags;
23756 else
23757 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23758
23759 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23760 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23761 break;
23762 }
23763
23764 if (TARGET_DEBUG_TARGET)
23765 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23766 get_decl_name (fndecl), ret);
23767
23768 return ret;
23769 }
23770
23771 /* This compares the priority of target features in function DECL1 and DECL2.
23772 It returns positive value if DECL1 is higher priority, negative value if
23773 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23774 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23775
23776 static int
23777 rs6000_compare_version_priority (tree decl1, tree decl2)
23778 {
23779 int priority1 = rs6000_clone_priority (decl1);
23780 int priority2 = rs6000_clone_priority (decl2);
23781 int ret = priority1 - priority2;
23782
23783 if (TARGET_DEBUG_TARGET)
23784 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23785 get_decl_name (decl1), get_decl_name (decl2), ret);
23786
23787 return ret;
23788 }
23789
23790 /* Make a dispatcher declaration for the multi-versioned function DECL.
23791 Calls to DECL function will be replaced with calls to the dispatcher
23792 by the front-end. Returns the decl of the dispatcher function. */
23793
23794 static tree
23795 rs6000_get_function_versions_dispatcher (void *decl)
23796 {
23797 tree fn = (tree) decl;
23798 struct cgraph_node *node = NULL;
23799 struct cgraph_node *default_node = NULL;
23800 struct cgraph_function_version_info *node_v = NULL;
23801 struct cgraph_function_version_info *first_v = NULL;
23802
23803 tree dispatch_decl = NULL;
23804
23805 struct cgraph_function_version_info *default_version_info = NULL;
23806 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23807
23808 if (TARGET_DEBUG_TARGET)
23809 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23810 get_decl_name (fn));
23811
23812 node = cgraph_node::get (fn);
23813 gcc_assert (node != NULL);
23814
23815 node_v = node->function_version ();
23816 gcc_assert (node_v != NULL);
23817
23818 if (node_v->dispatcher_resolver != NULL)
23819 return node_v->dispatcher_resolver;
23820
23821 /* Find the default version and make it the first node. */
23822 first_v = node_v;
23823 /* Go to the beginning of the chain. */
23824 while (first_v->prev != NULL)
23825 first_v = first_v->prev;
23826
23827 default_version_info = first_v;
23828 while (default_version_info != NULL)
23829 {
23830 const tree decl2 = default_version_info->this_node->decl;
23831 if (is_function_default_version (decl2))
23832 break;
23833 default_version_info = default_version_info->next;
23834 }
23835
23836 /* If there is no default node, just return NULL. */
23837 if (default_version_info == NULL)
23838 return NULL;
23839
23840 /* Make default info the first node. */
23841 if (first_v != default_version_info)
23842 {
23843 default_version_info->prev->next = default_version_info->next;
23844 if (default_version_info->next)
23845 default_version_info->next->prev = default_version_info->prev;
23846 first_v->prev = default_version_info;
23847 default_version_info->next = first_v;
23848 default_version_info->prev = NULL;
23849 }
23850
23851 default_node = default_version_info->this_node;
23852
23853 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23854 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23855 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23856 "exports hardware capability bits");
23857 #else
23858
23859 if (targetm.has_ifunc_p ())
23860 {
23861 struct cgraph_function_version_info *it_v = NULL;
23862 struct cgraph_node *dispatcher_node = NULL;
23863 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23864
23865 /* Right now, the dispatching is done via ifunc. */
23866 dispatch_decl = make_dispatcher_decl (default_node->decl);
23867
23868 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23869 gcc_assert (dispatcher_node != NULL);
23870 dispatcher_node->dispatcher_function = 1;
23871 dispatcher_version_info
23872 = dispatcher_node->insert_new_function_version ();
23873 dispatcher_version_info->next = default_version_info;
23874 dispatcher_node->definition = 1;
23875
23876 /* Set the dispatcher for all the versions. */
23877 it_v = default_version_info;
23878 while (it_v != NULL)
23879 {
23880 it_v->dispatcher_resolver = dispatch_decl;
23881 it_v = it_v->next;
23882 }
23883 }
23884 else
23885 {
23886 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23887 "multiversioning needs ifunc which is not supported "
23888 "on this target");
23889 }
23890 #endif
23891
23892 return dispatch_decl;
23893 }
23894
23895 /* Make the resolver function decl to dispatch the versions of a multi-
23896 versioned function, DEFAULT_DECL. Create an empty basic block in the
23897 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23898 function. */
23899
23900 static tree
23901 make_resolver_func (const tree default_decl,
23902 const tree dispatch_decl,
23903 basic_block *empty_bb)
23904 {
23905 /* Make the resolver function static. The resolver function returns
23906 void *. */
23907 tree decl_name = clone_function_name (default_decl, "resolver");
23908 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23909 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23910 tree decl = build_fn_decl (resolver_name, type);
23911 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23912
23913 DECL_NAME (decl) = decl_name;
23914 TREE_USED (decl) = 1;
23915 DECL_ARTIFICIAL (decl) = 1;
23916 DECL_IGNORED_P (decl) = 0;
23917 TREE_PUBLIC (decl) = 0;
23918 DECL_UNINLINABLE (decl) = 1;
23919
23920 /* Resolver is not external, body is generated. */
23921 DECL_EXTERNAL (decl) = 0;
23922 DECL_EXTERNAL (dispatch_decl) = 0;
23923
23924 DECL_CONTEXT (decl) = NULL_TREE;
23925 DECL_INITIAL (decl) = make_node (BLOCK);
23926 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23927
23928 if (DECL_COMDAT_GROUP (default_decl)
23929 || TREE_PUBLIC (default_decl))
23930 {
23931 /* In this case, each translation unit with a call to this
23932 versioned function will put out a resolver. Ensure it
23933 is comdat to keep just one copy. */
23934 DECL_COMDAT (decl) = 1;
23935 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
23936 }
23937 else
23938 TREE_PUBLIC (dispatch_decl) = 0;
23939
23940 /* Build result decl and add to function_decl. */
23941 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23942 DECL_CONTEXT (t) = decl;
23943 DECL_ARTIFICIAL (t) = 1;
23944 DECL_IGNORED_P (t) = 1;
23945 DECL_RESULT (decl) = t;
23946
23947 gimplify_function_tree (decl);
23948 push_cfun (DECL_STRUCT_FUNCTION (decl));
23949 *empty_bb = init_lowered_empty_function (decl, false,
23950 profile_count::uninitialized ());
23951
23952 cgraph_node::add_new_function (decl, true);
23953 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23954
23955 pop_cfun ();
23956
23957 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23958 DECL_ATTRIBUTES (dispatch_decl)
23959 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23960
23961 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23962
23963 return decl;
23964 }
23965
23966 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23967 return a pointer to VERSION_DECL if we are running on a machine that
23968 supports the index CLONE_ISA hardware architecture bits. This function will
23969 be called during version dispatch to decide which function version to
23970 execute. It returns the basic block at the end, to which more conditions
23971 can be added. */
23972
23973 static basic_block
23974 add_condition_to_bb (tree function_decl, tree version_decl,
23975 int clone_isa, basic_block new_bb)
23976 {
23977 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23978
23979 gcc_assert (new_bb != NULL);
23980 gimple_seq gseq = bb_seq (new_bb);
23981
23982
23983 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23984 build_fold_addr_expr (version_decl));
23985 tree result_var = create_tmp_var (ptr_type_node);
23986 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23987 gimple *return_stmt = gimple_build_return (result_var);
23988
23989 if (clone_isa == CLONE_DEFAULT)
23990 {
23991 gimple_seq_add_stmt (&gseq, convert_stmt);
23992 gimple_seq_add_stmt (&gseq, return_stmt);
23993 set_bb_seq (new_bb, gseq);
23994 gimple_set_bb (convert_stmt, new_bb);
23995 gimple_set_bb (return_stmt, new_bb);
23996 pop_cfun ();
23997 return new_bb;
23998 }
23999
24000 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24001 tree cond_var = create_tmp_var (bool_int_type_node);
24002 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24003 const char *arg_str = rs6000_clone_map[clone_isa].name;
24004 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24005 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24006 gimple_call_set_lhs (call_cond_stmt, cond_var);
24007
24008 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24009 gimple_set_bb (call_cond_stmt, new_bb);
24010 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24011
24012 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24013 NULL_TREE, NULL_TREE);
24014 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24015 gimple_set_bb (if_else_stmt, new_bb);
24016 gimple_seq_add_stmt (&gseq, if_else_stmt);
24017
24018 gimple_seq_add_stmt (&gseq, convert_stmt);
24019 gimple_seq_add_stmt (&gseq, return_stmt);
24020 set_bb_seq (new_bb, gseq);
24021
24022 basic_block bb1 = new_bb;
24023 edge e12 = split_block (bb1, if_else_stmt);
24024 basic_block bb2 = e12->dest;
24025 e12->flags &= ~EDGE_FALLTHRU;
24026 e12->flags |= EDGE_TRUE_VALUE;
24027
24028 edge e23 = split_block (bb2, return_stmt);
24029 gimple_set_bb (convert_stmt, bb2);
24030 gimple_set_bb (return_stmt, bb2);
24031
24032 basic_block bb3 = e23->dest;
24033 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24034
24035 remove_edge (e23);
24036 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24037
24038 pop_cfun ();
24039 return bb3;
24040 }
24041
24042 /* This function generates the dispatch function for multi-versioned functions.
24043 DISPATCH_DECL is the function which will contain the dispatch logic.
24044 FNDECLS are the function choices for dispatch, and is a tree chain.
24045 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24046 code is generated. */
24047
24048 static int
24049 dispatch_function_versions (tree dispatch_decl,
24050 void *fndecls_p,
24051 basic_block *empty_bb)
24052 {
24053 int ix;
24054 tree ele;
24055 vec<tree> *fndecls;
24056 tree clones[CLONE_MAX];
24057
24058 if (TARGET_DEBUG_TARGET)
24059 fputs ("dispatch_function_versions, top\n", stderr);
24060
24061 gcc_assert (dispatch_decl != NULL
24062 && fndecls_p != NULL
24063 && empty_bb != NULL);
24064
24065 /* fndecls_p is actually a vector. */
24066 fndecls = static_cast<vec<tree> *> (fndecls_p);
24067
24068 /* At least one more version other than the default. */
24069 gcc_assert (fndecls->length () >= 2);
24070
24071 /* The first version in the vector is the default decl. */
24072 memset ((void *) clones, '\0', sizeof (clones));
24073 clones[CLONE_DEFAULT] = (*fndecls)[0];
24074
24075 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24076 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24077 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24078 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24079 to insert the code here to do the call. */
24080
24081 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24082 {
24083 int priority = rs6000_clone_priority (ele);
24084 if (!clones[priority])
24085 clones[priority] = ele;
24086 }
24087
24088 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24089 if (clones[ix])
24090 {
24091 if (TARGET_DEBUG_TARGET)
24092 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24093 ix, get_decl_name (clones[ix]));
24094
24095 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24096 *empty_bb);
24097 }
24098
24099 return 0;
24100 }
24101
24102 /* Generate the dispatching code body to dispatch multi-versioned function
24103 DECL. The target hook is called to process the "target" attributes and
24104 provide the code to dispatch the right function at run-time. NODE points
24105 to the dispatcher decl whose body will be created. */
24106
24107 static tree
24108 rs6000_generate_version_dispatcher_body (void *node_p)
24109 {
24110 tree resolver;
24111 basic_block empty_bb;
24112 struct cgraph_node *node = (cgraph_node *) node_p;
24113 struct cgraph_function_version_info *ninfo = node->function_version ();
24114
24115 if (ninfo->dispatcher_resolver)
24116 return ninfo->dispatcher_resolver;
24117
24118 /* node is going to be an alias, so remove the finalized bit. */
24119 node->definition = false;
24120
24121 /* The first version in the chain corresponds to the default version. */
24122 ninfo->dispatcher_resolver = resolver
24123 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24124
24125 if (TARGET_DEBUG_TARGET)
24126 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24127 get_decl_name (resolver));
24128
24129 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24130 auto_vec<tree, 2> fn_ver_vec;
24131
24132 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24133 vinfo;
24134 vinfo = vinfo->next)
24135 {
24136 struct cgraph_node *version = vinfo->this_node;
24137 /* Check for virtual functions here again, as by this time it should
24138 have been determined if this function needs a vtable index or
24139 not. This happens for methods in derived classes that override
24140 virtual methods in base classes but are not explicitly marked as
24141 virtual. */
24142 if (DECL_VINDEX (version->decl))
24143 sorry ("Virtual function multiversioning not supported");
24144
24145 fn_ver_vec.safe_push (version->decl);
24146 }
24147
24148 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24149 cgraph_edge::rebuild_edges ();
24150 pop_cfun ();
24151 return resolver;
24152 }
24153
24154 \f
24155 /* Hook to determine if one function can safely inline another. */
24156
24157 static bool
24158 rs6000_can_inline_p (tree caller, tree callee)
24159 {
24160 bool ret = false;
24161 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24162 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24163
24164 /* If the callee has no option attributes, then it is ok to inline. */
24165 if (!callee_tree)
24166 ret = true;
24167
24168 else
24169 {
24170 HOST_WIDE_INT caller_isa;
24171 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24172 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24173 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24174
24175 /* If the caller has option attributes, then use them.
24176 Otherwise, use the command line options. */
24177 if (caller_tree)
24178 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24179 else
24180 caller_isa = rs6000_isa_flags;
24181
24182 /* The callee's options must be a subset of the caller's options, i.e.
24183 a vsx function may inline an altivec function, but a no-vsx function
24184 must not inline a vsx function. However, for those options that the
24185 callee has explicitly enabled or disabled, then we must enforce that
24186 the callee's and caller's options match exactly; see PR70010. */
24187 if (((caller_isa & callee_isa) == callee_isa)
24188 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24189 ret = true;
24190 }
24191
24192 if (TARGET_DEBUG_TARGET)
24193 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24194 get_decl_name (caller), get_decl_name (callee),
24195 (ret ? "can" : "cannot"));
24196
24197 return ret;
24198 }
24199 \f
24200 /* Allocate a stack temp and fixup the address so it meets the particular
24201 memory requirements (either offetable or REG+REG addressing). */
24202
24203 rtx
24204 rs6000_allocate_stack_temp (machine_mode mode,
24205 bool offsettable_p,
24206 bool reg_reg_p)
24207 {
24208 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24209 rtx addr = XEXP (stack, 0);
24210 int strict_p = reload_completed;
24211
24212 if (!legitimate_indirect_address_p (addr, strict_p))
24213 {
24214 if (offsettable_p
24215 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24216 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24217
24218 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24219 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24220 }
24221
24222 return stack;
24223 }
24224
24225 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24226 convert to such a form to deal with memory reference instructions
24227 like STFIWX and LDBRX that only take reg+reg addressing. */
24228
24229 rtx
24230 rs6000_force_indexed_or_indirect_mem (rtx x)
24231 {
24232 machine_mode mode = GET_MODE (x);
24233
24234 gcc_assert (MEM_P (x));
24235 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24236 {
24237 rtx addr = XEXP (x, 0);
24238 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24239 {
24240 rtx reg = XEXP (addr, 0);
24241 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24242 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24243 gcc_assert (REG_P (reg));
24244 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24245 addr = reg;
24246 }
24247 else if (GET_CODE (addr) == PRE_MODIFY)
24248 {
24249 rtx reg = XEXP (addr, 0);
24250 rtx expr = XEXP (addr, 1);
24251 gcc_assert (REG_P (reg));
24252 gcc_assert (GET_CODE (expr) == PLUS);
24253 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24254 addr = reg;
24255 }
24256
24257 if (GET_CODE (addr) == PLUS)
24258 {
24259 rtx op0 = XEXP (addr, 0);
24260 rtx op1 = XEXP (addr, 1);
24261 op0 = force_reg (Pmode, op0);
24262 op1 = force_reg (Pmode, op1);
24263 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24264 }
24265 else
24266 x = replace_equiv_address (x, force_reg (Pmode, addr));
24267 }
24268
24269 return x;
24270 }
24271
24272 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24273
24274 On the RS/6000, all integer constants are acceptable, most won't be valid
24275 for particular insns, though. Only easy FP constants are acceptable. */
24276
24277 static bool
24278 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24279 {
24280 if (TARGET_ELF && tls_referenced_p (x))
24281 return false;
24282
24283 if (CONST_DOUBLE_P (x))
24284 return easy_fp_constant (x, mode);
24285
24286 if (GET_CODE (x) == CONST_VECTOR)
24287 return easy_vector_constant (x, mode);
24288
24289 return true;
24290 }
24291
24292 \f
24293 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24294
24295 static bool
24296 chain_already_loaded (rtx_insn *last)
24297 {
24298 for (; last != NULL; last = PREV_INSN (last))
24299 {
24300 if (NONJUMP_INSN_P (last))
24301 {
24302 rtx patt = PATTERN (last);
24303
24304 if (GET_CODE (patt) == SET)
24305 {
24306 rtx lhs = XEXP (patt, 0);
24307
24308 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24309 return true;
24310 }
24311 }
24312 }
24313 return false;
24314 }
24315
24316 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24317
24318 void
24319 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24320 {
24321 rtx func = func_desc;
24322 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24323 rtx toc_load = NULL_RTX;
24324 rtx toc_restore = NULL_RTX;
24325 rtx func_addr;
24326 rtx abi_reg = NULL_RTX;
24327 rtx call[5];
24328 int n_call;
24329 rtx insn;
24330 bool is_pltseq_longcall;
24331
24332 if (global_tlsarg)
24333 tlsarg = global_tlsarg;
24334
24335 /* Handle longcall attributes. */
24336 is_pltseq_longcall = false;
24337 if ((INTVAL (cookie) & CALL_LONG) != 0
24338 && GET_CODE (func_desc) == SYMBOL_REF)
24339 {
24340 func = rs6000_longcall_ref (func_desc, tlsarg);
24341 if (TARGET_PLTSEQ)
24342 is_pltseq_longcall = true;
24343 }
24344
24345 /* Handle indirect calls. */
24346 if (!SYMBOL_REF_P (func)
24347 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24348 {
24349 if (!rs6000_pcrel_p (cfun))
24350 {
24351 /* Save the TOC into its reserved slot before the call,
24352 and prepare to restore it after the call. */
24353 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24354 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24355 gen_rtvec (1, stack_toc_offset),
24356 UNSPEC_TOCSLOT);
24357 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24358
24359 /* Can we optimize saving the TOC in the prologue or
24360 do we need to do it at every call? */
24361 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24362 cfun->machine->save_toc_in_prologue = true;
24363 else
24364 {
24365 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24366 rtx stack_toc_mem = gen_frame_mem (Pmode,
24367 gen_rtx_PLUS (Pmode, stack_ptr,
24368 stack_toc_offset));
24369 MEM_VOLATILE_P (stack_toc_mem) = 1;
24370 if (is_pltseq_longcall)
24371 {
24372 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24373 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24374 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24375 }
24376 else
24377 emit_move_insn (stack_toc_mem, toc_reg);
24378 }
24379 }
24380
24381 if (DEFAULT_ABI == ABI_ELFv2)
24382 {
24383 /* A function pointer in the ELFv2 ABI is just a plain address, but
24384 the ABI requires it to be loaded into r12 before the call. */
24385 func_addr = gen_rtx_REG (Pmode, 12);
24386 if (!rtx_equal_p (func_addr, func))
24387 emit_move_insn (func_addr, func);
24388 abi_reg = func_addr;
24389 /* Indirect calls via CTR are strongly preferred over indirect
24390 calls via LR, so move the address there. Needed to mark
24391 this insn for linker plt sequence editing too. */
24392 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24393 if (is_pltseq_longcall)
24394 {
24395 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24396 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24397 emit_insn (gen_rtx_SET (func_addr, mark_func));
24398 v = gen_rtvec (2, func_addr, func_desc);
24399 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24400 }
24401 else
24402 emit_move_insn (func_addr, abi_reg);
24403 }
24404 else
24405 {
24406 /* A function pointer under AIX is a pointer to a data area whose
24407 first word contains the actual address of the function, whose
24408 second word contains a pointer to its TOC, and whose third word
24409 contains a value to place in the static chain register (r11).
24410 Note that if we load the static chain, our "trampoline" need
24411 not have any executable code. */
24412
24413 /* Load up address of the actual function. */
24414 func = force_reg (Pmode, func);
24415 func_addr = gen_reg_rtx (Pmode);
24416 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24417
24418 /* Indirect calls via CTR are strongly preferred over indirect
24419 calls via LR, so move the address there. */
24420 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24421 emit_move_insn (ctr_reg, func_addr);
24422 func_addr = ctr_reg;
24423
24424 /* Prepare to load the TOC of the called function. Note that the
24425 TOC load must happen immediately before the actual call so
24426 that unwinding the TOC registers works correctly. See the
24427 comment in frob_update_context. */
24428 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24429 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24430 gen_rtx_PLUS (Pmode, func,
24431 func_toc_offset));
24432 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24433
24434 /* If we have a static chain, load it up. But, if the call was
24435 originally direct, the 3rd word has not been written since no
24436 trampoline has been built, so we ought not to load it, lest we
24437 override a static chain value. */
24438 if (!(GET_CODE (func_desc) == SYMBOL_REF
24439 && SYMBOL_REF_FUNCTION_P (func_desc))
24440 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24441 && !chain_already_loaded (get_current_sequence ()->next->last))
24442 {
24443 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24444 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24445 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24446 gen_rtx_PLUS (Pmode, func,
24447 func_sc_offset));
24448 emit_move_insn (sc_reg, func_sc_mem);
24449 abi_reg = sc_reg;
24450 }
24451 }
24452 }
24453 else
24454 {
24455 /* No TOC register needed for calls from PC-relative callers. */
24456 if (!rs6000_pcrel_p (cfun))
24457 /* Direct calls use the TOC: for local calls, the callee will
24458 assume the TOC register is set; for non-local calls, the
24459 PLT stub needs the TOC register. */
24460 abi_reg = toc_reg;
24461 func_addr = func;
24462 }
24463
24464 /* Create the call. */
24465 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24466 if (value != NULL_RTX)
24467 call[0] = gen_rtx_SET (value, call[0]);
24468 call[1] = gen_rtx_USE (VOIDmode, cookie);
24469 n_call = 2;
24470
24471 if (toc_load)
24472 call[n_call++] = toc_load;
24473 if (toc_restore)
24474 call[n_call++] = toc_restore;
24475
24476 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24477
24478 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24479 insn = emit_call_insn (insn);
24480
24481 /* Mention all registers defined by the ABI to hold information
24482 as uses in CALL_INSN_FUNCTION_USAGE. */
24483 if (abi_reg)
24484 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24485 }
24486
24487 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24488
24489 void
24490 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24491 {
24492 rtx call[2];
24493 rtx insn;
24494
24495 gcc_assert (INTVAL (cookie) == 0);
24496
24497 if (global_tlsarg)
24498 tlsarg = global_tlsarg;
24499
24500 /* Create the call. */
24501 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24502 if (value != NULL_RTX)
24503 call[0] = gen_rtx_SET (value, call[0]);
24504
24505 call[1] = simple_return_rtx;
24506
24507 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24508 insn = emit_call_insn (insn);
24509
24510 /* Note use of the TOC register. */
24511 if (!rs6000_pcrel_p (cfun))
24512 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24513 gen_rtx_REG (Pmode, TOC_REGNUM));
24514 }
24515
24516 /* Expand code to perform a call under the SYSV4 ABI. */
24517
24518 void
24519 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24520 {
24521 rtx func = func_desc;
24522 rtx func_addr;
24523 rtx call[4];
24524 rtx insn;
24525 rtx abi_reg = NULL_RTX;
24526 int n;
24527
24528 if (global_tlsarg)
24529 tlsarg = global_tlsarg;
24530
24531 /* Handle longcall attributes. */
24532 if ((INTVAL (cookie) & CALL_LONG) != 0
24533 && GET_CODE (func_desc) == SYMBOL_REF)
24534 {
24535 func = rs6000_longcall_ref (func_desc, tlsarg);
24536 /* If the longcall was implemented as an inline PLT call using
24537 PLT unspecs then func will be REG:r11. If not, func will be
24538 a pseudo reg. The inline PLT call sequence supports lazy
24539 linking (and longcalls to functions in dlopen'd libraries).
24540 The other style of longcalls don't. The lazy linking entry
24541 to the dynamic symbol resolver requires r11 be the function
24542 address (as it is for linker generated PLT stubs). Ensure
24543 r11 stays valid to the bctrl by marking r11 used by the call. */
24544 if (TARGET_PLTSEQ)
24545 abi_reg = func;
24546 }
24547
24548 /* Handle indirect calls. */
24549 if (GET_CODE (func) != SYMBOL_REF)
24550 {
24551 func = force_reg (Pmode, func);
24552
24553 /* Indirect calls via CTR are strongly preferred over indirect
24554 calls via LR, so move the address there. That can't be left
24555 to reload because we want to mark every instruction in an
24556 inline PLT call sequence with a reloc, enabling the linker to
24557 edit the sequence back to a direct call when that makes sense. */
24558 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24559 if (abi_reg)
24560 {
24561 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24562 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24563 emit_insn (gen_rtx_SET (func_addr, mark_func));
24564 v = gen_rtvec (2, func_addr, func_desc);
24565 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24566 }
24567 else
24568 emit_move_insn (func_addr, func);
24569 }
24570 else
24571 func_addr = func;
24572
24573 /* Create the call. */
24574 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24575 if (value != NULL_RTX)
24576 call[0] = gen_rtx_SET (value, call[0]);
24577
24578 call[1] = gen_rtx_USE (VOIDmode, cookie);
24579 n = 2;
24580 if (TARGET_SECURE_PLT
24581 && flag_pic
24582 && GET_CODE (func_addr) == SYMBOL_REF
24583 && !SYMBOL_REF_LOCAL_P (func_addr))
24584 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24585
24586 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24587
24588 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24589 insn = emit_call_insn (insn);
24590 if (abi_reg)
24591 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24592 }
24593
24594 /* Expand code to perform a sibling call under the SysV4 ABI. */
24595
24596 void
24597 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24598 {
24599 rtx func = func_desc;
24600 rtx func_addr;
24601 rtx call[3];
24602 rtx insn;
24603 rtx abi_reg = NULL_RTX;
24604
24605 if (global_tlsarg)
24606 tlsarg = global_tlsarg;
24607
24608 /* Handle longcall attributes. */
24609 if ((INTVAL (cookie) & CALL_LONG) != 0
24610 && GET_CODE (func_desc) == SYMBOL_REF)
24611 {
24612 func = rs6000_longcall_ref (func_desc, tlsarg);
24613 /* If the longcall was implemented as an inline PLT call using
24614 PLT unspecs then func will be REG:r11. If not, func will be
24615 a pseudo reg. The inline PLT call sequence supports lazy
24616 linking (and longcalls to functions in dlopen'd libraries).
24617 The other style of longcalls don't. The lazy linking entry
24618 to the dynamic symbol resolver requires r11 be the function
24619 address (as it is for linker generated PLT stubs). Ensure
24620 r11 stays valid to the bctr by marking r11 used by the call. */
24621 if (TARGET_PLTSEQ)
24622 abi_reg = func;
24623 }
24624
24625 /* Handle indirect calls. */
24626 if (GET_CODE (func) != SYMBOL_REF)
24627 {
24628 func = force_reg (Pmode, func);
24629
24630 /* Indirect sibcalls must go via CTR. That can't be left to
24631 reload because we want to mark every instruction in an inline
24632 PLT call sequence with a reloc, enabling the linker to edit
24633 the sequence back to a direct call when that makes sense. */
24634 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24635 if (abi_reg)
24636 {
24637 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24638 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24639 emit_insn (gen_rtx_SET (func_addr, mark_func));
24640 v = gen_rtvec (2, func_addr, func_desc);
24641 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24642 }
24643 else
24644 emit_move_insn (func_addr, func);
24645 }
24646 else
24647 func_addr = func;
24648
24649 /* Create the call. */
24650 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24651 if (value != NULL_RTX)
24652 call[0] = gen_rtx_SET (value, call[0]);
24653
24654 call[1] = gen_rtx_USE (VOIDmode, cookie);
24655 call[2] = simple_return_rtx;
24656
24657 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24658 insn = emit_call_insn (insn);
24659 if (abi_reg)
24660 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24661 }
24662
24663 #if TARGET_MACHO
24664
24665 /* Expand code to perform a call under the Darwin ABI.
24666 Modulo handling of mlongcall, this is much the same as sysv.
24667 if/when the longcall optimisation is removed, we could drop this
24668 code and use the sysv case (taking care to avoid the tls stuff).
24669
24670 We can use this for sibcalls too, if needed. */
24671
24672 void
24673 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24674 rtx cookie, bool sibcall)
24675 {
24676 rtx func = func_desc;
24677 rtx func_addr;
24678 rtx call[3];
24679 rtx insn;
24680 int cookie_val = INTVAL (cookie);
24681 bool make_island = false;
24682
24683 /* Handle longcall attributes, there are two cases for Darwin:
24684 1) Newer linkers are capable of synthesising any branch islands needed.
24685 2) We need a helper branch island synthesised by the compiler.
24686 The second case has mostly been retired and we don't use it for m64.
24687 In fact, it's is an optimisation, we could just indirect as sysv does..
24688 ... however, backwards compatibility for now.
24689 If we're going to use this, then we need to keep the CALL_LONG bit set,
24690 so that we can pick up the special insn form later. */
24691 if ((cookie_val & CALL_LONG) != 0
24692 && GET_CODE (func_desc) == SYMBOL_REF)
24693 {
24694 /* FIXME: the longcall opt should not hang off this flag, it is most
24695 likely incorrect for kernel-mode code-generation. */
24696 if (darwin_symbol_stubs && TARGET_32BIT)
24697 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24698 else
24699 {
24700 /* The linker is capable of doing this, but the user explicitly
24701 asked for -mlongcall, so we'll do the 'normal' version. */
24702 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24703 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24704 }
24705 }
24706
24707 /* Handle indirect calls. */
24708 if (GET_CODE (func) != SYMBOL_REF)
24709 {
24710 func = force_reg (Pmode, func);
24711
24712 /* Indirect calls via CTR are strongly preferred over indirect
24713 calls via LR, and are required for indirect sibcalls, so move
24714 the address there. */
24715 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24716 emit_move_insn (func_addr, func);
24717 }
24718 else
24719 func_addr = func;
24720
24721 /* Create the call. */
24722 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24723 if (value != NULL_RTX)
24724 call[0] = gen_rtx_SET (value, call[0]);
24725
24726 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24727
24728 if (sibcall)
24729 call[2] = simple_return_rtx;
24730 else
24731 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24732
24733 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24734 insn = emit_call_insn (insn);
24735 /* Now we have the debug info in the insn, we can set up the branch island
24736 if we're using one. */
24737 if (make_island)
24738 {
24739 tree funname = get_identifier (XSTR (func_desc, 0));
24740
24741 if (no_previous_def (funname))
24742 {
24743 rtx label_rtx = gen_label_rtx ();
24744 char *label_buf, temp_buf[256];
24745 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24746 CODE_LABEL_NUMBER (label_rtx));
24747 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24748 tree labelname = get_identifier (label_buf);
24749 add_compiler_branch_island (labelname, funname,
24750 insn_line ((const rtx_insn*)insn));
24751 }
24752 }
24753 }
24754 #endif
24755
24756 void
24757 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24758 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24759 {
24760 #if TARGET_MACHO
24761 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24762 #else
24763 gcc_unreachable();
24764 #endif
24765 }
24766
24767
24768 void
24769 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24770 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24771 {
24772 #if TARGET_MACHO
24773 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24774 #else
24775 gcc_unreachable();
24776 #endif
24777 }
24778
24779 /* Return whether we should generate PC-relative code for FNDECL. */
24780 bool
24781 rs6000_fndecl_pcrel_p (const_tree fndecl)
24782 {
24783 if (DEFAULT_ABI != ABI_ELFv2)
24784 return false;
24785
24786 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24787
24788 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24789 && TARGET_CMODEL == CMODEL_MEDIUM);
24790 }
24791
24792 /* Return whether we should generate PC-relative code for *FN. */
24793 bool
24794 rs6000_pcrel_p (struct function *fn)
24795 {
24796 if (DEFAULT_ABI != ABI_ELFv2)
24797 return false;
24798
24799 /* Optimize usual case. */
24800 if (fn == cfun)
24801 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24802 && TARGET_CMODEL == CMODEL_MEDIUM);
24803
24804 return rs6000_fndecl_pcrel_p (fn->decl);
24805 }
24806
24807 \f
24808 /* Given an address (ADDR), a mode (MODE), and what the format of the
24809 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24810 for the address. */
24811
24812 enum insn_form
24813 address_to_insn_form (rtx addr,
24814 machine_mode mode,
24815 enum non_prefixed_form non_prefixed_format)
24816 {
24817 /* Single register is easy. */
24818 if (REG_P (addr) || SUBREG_P (addr))
24819 return INSN_FORM_BASE_REG;
24820
24821 /* If the non prefixed instruction format doesn't support offset addressing,
24822 make sure only indexed addressing is allowed.
24823
24824 We special case SDmode so that the register allocator does not try to move
24825 SDmode through GPR registers, but instead uses the 32-bit integer load and
24826 store instructions for the floating point registers. */
24827 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24828 {
24829 if (GET_CODE (addr) != PLUS)
24830 return INSN_FORM_BAD;
24831
24832 rtx op0 = XEXP (addr, 0);
24833 rtx op1 = XEXP (addr, 1);
24834 if (!REG_P (op0) && !SUBREG_P (op0))
24835 return INSN_FORM_BAD;
24836
24837 if (!REG_P (op1) && !SUBREG_P (op1))
24838 return INSN_FORM_BAD;
24839
24840 return INSN_FORM_X;
24841 }
24842
24843 /* Deal with update forms. */
24844 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24845 return INSN_FORM_UPDATE;
24846
24847 /* Handle PC-relative symbols and labels. Check for both local and
24848 external symbols. Assume labels are always local. TLS symbols
24849 are not PC-relative for rs6000. */
24850 if (TARGET_PCREL)
24851 {
24852 if (LABEL_REF_P (addr))
24853 return INSN_FORM_PCREL_LOCAL;
24854
24855 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
24856 {
24857 if (!SYMBOL_REF_LOCAL_P (addr))
24858 return INSN_FORM_PCREL_EXTERNAL;
24859 else
24860 return INSN_FORM_PCREL_LOCAL;
24861 }
24862 }
24863
24864 if (GET_CODE (addr) == CONST)
24865 addr = XEXP (addr, 0);
24866
24867 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24868 if (GET_CODE (addr) == LO_SUM)
24869 return INSN_FORM_LO_SUM;
24870
24871 /* Everything below must be an offset address of some form. */
24872 if (GET_CODE (addr) != PLUS)
24873 return INSN_FORM_BAD;
24874
24875 rtx op0 = XEXP (addr, 0);
24876 rtx op1 = XEXP (addr, 1);
24877
24878 /* Check for indexed addresses. */
24879 if (REG_P (op1) || SUBREG_P (op1))
24880 {
24881 if (REG_P (op0) || SUBREG_P (op0))
24882 return INSN_FORM_X;
24883
24884 return INSN_FORM_BAD;
24885 }
24886
24887 if (!CONST_INT_P (op1))
24888 return INSN_FORM_BAD;
24889
24890 HOST_WIDE_INT offset = INTVAL (op1);
24891 if (!SIGNED_INTEGER_34BIT_P (offset))
24892 return INSN_FORM_BAD;
24893
24894 /* Check for local and external PC-relative addresses. Labels are always
24895 local. TLS symbols are not PC-relative for rs6000. */
24896 if (TARGET_PCREL)
24897 {
24898 if (LABEL_REF_P (op0))
24899 return INSN_FORM_PCREL_LOCAL;
24900
24901 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
24902 {
24903 if (!SYMBOL_REF_LOCAL_P (op0))
24904 return INSN_FORM_PCREL_EXTERNAL;
24905 else
24906 return INSN_FORM_PCREL_LOCAL;
24907 }
24908 }
24909
24910 /* If it isn't PC-relative, the address must use a base register. */
24911 if (!REG_P (op0) && !SUBREG_P (op0))
24912 return INSN_FORM_BAD;
24913
24914 /* Large offsets must be prefixed. */
24915 if (!SIGNED_INTEGER_16BIT_P (offset))
24916 {
24917 if (TARGET_PREFIXED)
24918 return INSN_FORM_PREFIXED_NUMERIC;
24919
24920 return INSN_FORM_BAD;
24921 }
24922
24923 /* We have a 16-bit offset, see what default instruction format to use. */
24924 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24925 {
24926 unsigned size = GET_MODE_SIZE (mode);
24927
24928 /* On 64-bit systems, assume 64-bit integers need to use DS form
24929 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24930 (for LXV and STXV). TImode is problematical in that its normal usage
24931 is expected to be GPRs where it wants a DS instruction format, but if
24932 it goes into the vector registers, it wants a DQ instruction
24933 format. */
24934 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24935 non_prefixed_format = NON_PREFIXED_DS;
24936
24937 else if (TARGET_VSX && size >= 16
24938 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24939 non_prefixed_format = NON_PREFIXED_DQ;
24940
24941 else
24942 non_prefixed_format = NON_PREFIXED_D;
24943 }
24944
24945 /* Classify the D/DS/DQ-form addresses. */
24946 switch (non_prefixed_format)
24947 {
24948 /* Instruction format D, all 16 bits are valid. */
24949 case NON_PREFIXED_D:
24950 return INSN_FORM_D;
24951
24952 /* Instruction format DS, bottom 2 bits must be 0. */
24953 case NON_PREFIXED_DS:
24954 if ((offset & 3) == 0)
24955 return INSN_FORM_DS;
24956
24957 else if (TARGET_PREFIXED)
24958 return INSN_FORM_PREFIXED_NUMERIC;
24959
24960 else
24961 return INSN_FORM_BAD;
24962
24963 /* Instruction format DQ, bottom 4 bits must be 0. */
24964 case NON_PREFIXED_DQ:
24965 if ((offset & 15) == 0)
24966 return INSN_FORM_DQ;
24967
24968 else if (TARGET_PREFIXED)
24969 return INSN_FORM_PREFIXED_NUMERIC;
24970
24971 else
24972 return INSN_FORM_BAD;
24973
24974 default:
24975 break;
24976 }
24977
24978 return INSN_FORM_BAD;
24979 }
24980
24981 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24982 instruction format (D/DS/DQ) used for offset memory. */
24983
24984 static enum non_prefixed_form
24985 reg_to_non_prefixed (rtx reg, machine_mode mode)
24986 {
24987 /* If it isn't a register, use the defaults. */
24988 if (!REG_P (reg) && !SUBREG_P (reg))
24989 return NON_PREFIXED_DEFAULT;
24990
24991 unsigned int r = reg_or_subregno (reg);
24992
24993 /* If we have a pseudo, use the default instruction format. */
24994 if (!HARD_REGISTER_NUM_P (r))
24995 return NON_PREFIXED_DEFAULT;
24996
24997 unsigned size = GET_MODE_SIZE (mode);
24998
24999 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25000 128-bit floating point, and 128-bit integers. Before power9, only indexed
25001 addressing was available for vectors. */
25002 if (FP_REGNO_P (r))
25003 {
25004 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25005 return NON_PREFIXED_D;
25006
25007 else if (size < 8)
25008 return NON_PREFIXED_X;
25009
25010 else if (TARGET_VSX && size >= 16
25011 && (VECTOR_MODE_P (mode)
25012 || FLOAT128_VECTOR_P (mode)
25013 || mode == TImode || mode == CTImode))
25014 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25015
25016 else
25017 return NON_PREFIXED_DEFAULT;
25018 }
25019
25020 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25021 128-bit floating point, and 128-bit integers. Before power9, only indexed
25022 addressing was available. */
25023 else if (ALTIVEC_REGNO_P (r))
25024 {
25025 if (!TARGET_P9_VECTOR)
25026 return NON_PREFIXED_X;
25027
25028 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25029 return NON_PREFIXED_DS;
25030
25031 else if (size < 8)
25032 return NON_PREFIXED_X;
25033
25034 else if (TARGET_VSX && size >= 16
25035 && (VECTOR_MODE_P (mode)
25036 || FLOAT128_VECTOR_P (mode)
25037 || mode == TImode || mode == CTImode))
25038 return NON_PREFIXED_DQ;
25039
25040 else
25041 return NON_PREFIXED_DEFAULT;
25042 }
25043
25044 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25045 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25046 through the GPR registers for memory operations. */
25047 else if (TARGET_POWERPC64 && size >= 8)
25048 return NON_PREFIXED_DS;
25049
25050 return NON_PREFIXED_D;
25051 }
25052
25053 \f
25054 /* Whether a load instruction is a prefixed instruction. This is called from
25055 the prefixed attribute processing. */
25056
25057 bool
25058 prefixed_load_p (rtx_insn *insn)
25059 {
25060 /* Validate the insn to make sure it is a normal load insn. */
25061 extract_insn_cached (insn);
25062 if (recog_data.n_operands < 2)
25063 return false;
25064
25065 rtx reg = recog_data.operand[0];
25066 rtx mem = recog_data.operand[1];
25067
25068 if (!REG_P (reg) && !SUBREG_P (reg))
25069 return false;
25070
25071 if (!MEM_P (mem))
25072 return false;
25073
25074 /* Prefixed load instructions do not support update or indexed forms. */
25075 if (get_attr_indexed (insn) == INDEXED_YES
25076 || get_attr_update (insn) == UPDATE_YES)
25077 return false;
25078
25079 /* LWA uses the DS format instead of the D format that LWZ uses. */
25080 enum non_prefixed_form non_prefixed;
25081 machine_mode reg_mode = GET_MODE (reg);
25082 machine_mode mem_mode = GET_MODE (mem);
25083
25084 if (mem_mode == SImode && reg_mode == DImode
25085 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25086 non_prefixed = NON_PREFIXED_DS;
25087
25088 else
25089 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25090
25091 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25092 }
25093
25094 /* Whether a store instruction is a prefixed instruction. This is called from
25095 the prefixed attribute processing. */
25096
25097 bool
25098 prefixed_store_p (rtx_insn *insn)
25099 {
25100 /* Validate the insn to make sure it is a normal store insn. */
25101 extract_insn_cached (insn);
25102 if (recog_data.n_operands < 2)
25103 return false;
25104
25105 rtx mem = recog_data.operand[0];
25106 rtx reg = recog_data.operand[1];
25107
25108 if (!REG_P (reg) && !SUBREG_P (reg))
25109 return false;
25110
25111 if (!MEM_P (mem))
25112 return false;
25113
25114 /* Prefixed store instructions do not support update or indexed forms. */
25115 if (get_attr_indexed (insn) == INDEXED_YES
25116 || get_attr_update (insn) == UPDATE_YES)
25117 return false;
25118
25119 machine_mode mem_mode = GET_MODE (mem);
25120 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25121 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25122 }
25123
25124 /* Whether a load immediate or add instruction is a prefixed instruction. This
25125 is called from the prefixed attribute processing. */
25126
25127 bool
25128 prefixed_paddi_p (rtx_insn *insn)
25129 {
25130 rtx set = single_set (insn);
25131 if (!set)
25132 return false;
25133
25134 rtx dest = SET_DEST (set);
25135 rtx src = SET_SRC (set);
25136
25137 if (!REG_P (dest) && !SUBREG_P (dest))
25138 return false;
25139
25140 /* Is this a load immediate that can't be done with a simple ADDI or
25141 ADDIS? */
25142 if (CONST_INT_P (src))
25143 return (satisfies_constraint_eI (src)
25144 && !satisfies_constraint_I (src)
25145 && !satisfies_constraint_L (src));
25146
25147 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25148 ADDIS? */
25149 if (GET_CODE (src) == PLUS)
25150 {
25151 rtx op1 = XEXP (src, 1);
25152
25153 return (CONST_INT_P (op1)
25154 && satisfies_constraint_eI (op1)
25155 && !satisfies_constraint_I (op1)
25156 && !satisfies_constraint_L (op1));
25157 }
25158
25159 /* If not, is it a load of a PC-relative address? */
25160 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25161 return false;
25162
25163 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25164 return false;
25165
25166 enum insn_form iform = address_to_insn_form (src, Pmode,
25167 NON_PREFIXED_DEFAULT);
25168
25169 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25170 }
25171
25172 /* Whether the next instruction needs a 'p' prefix issued before the
25173 instruction is printed out. */
25174 static bool next_insn_prefixed_p;
25175
25176 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25177 outputting the assembler code. On the PowerPC, we remember if the current
25178 insn is a prefixed insn where we need to emit a 'p' before the insn.
25179
25180 In addition, if the insn is part of a PC-relative reference to an external
25181 label optimization, this is recorded also. */
25182 void
25183 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25184 {
25185 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25186 return;
25187 }
25188
25189 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25190 We use it to emit a 'p' for prefixed insns that is set in
25191 FINAL_PRESCAN_INSN. */
25192 void
25193 rs6000_asm_output_opcode (FILE *stream)
25194 {
25195 if (next_insn_prefixed_p)
25196 fprintf (stream, "p");
25197
25198 return;
25199 }
25200
25201 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25202 should be adjusted to reflect any required changes. This macro is used when
25203 there is some systematic length adjustment required that would be difficult
25204 to express in the length attribute.
25205
25206 In the PowerPC, we use this to adjust the length of an instruction if one or
25207 more prefixed instructions are generated, using the attribute
25208 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25209 hardware requires that a prefied instruciton does not cross a 64-byte
25210 boundary. This means the compiler has to assume the length of the first
25211 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25212 already set for the non-prefixed instruction, we just need to udpate for the
25213 difference. */
25214
25215 int
25216 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25217 {
25218 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
25219 {
25220 rtx pattern = PATTERN (insn);
25221 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25222 && get_attr_prefixed (insn) == PREFIXED_YES)
25223 {
25224 int num_prefixed = get_attr_max_prefixed_insns (insn);
25225 length += 4 * (num_prefixed + 1);
25226 }
25227 }
25228
25229 return length;
25230 }
25231
25232 \f
25233 #ifdef HAVE_GAS_HIDDEN
25234 # define USE_HIDDEN_LINKONCE 1
25235 #else
25236 # define USE_HIDDEN_LINKONCE 0
25237 #endif
25238
25239 /* Fills in the label name that should be used for a 476 link stack thunk. */
25240
25241 void
25242 get_ppc476_thunk_name (char name[32])
25243 {
25244 gcc_assert (TARGET_LINK_STACK);
25245
25246 if (USE_HIDDEN_LINKONCE)
25247 sprintf (name, "__ppc476.get_thunk");
25248 else
25249 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25250 }
25251
25252 /* This function emits the simple thunk routine that is used to preserve
25253 the link stack on the 476 cpu. */
25254
25255 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25256 static void
25257 rs6000_code_end (void)
25258 {
25259 char name[32];
25260 tree decl;
25261
25262 if (!TARGET_LINK_STACK)
25263 return;
25264
25265 get_ppc476_thunk_name (name);
25266
25267 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25268 build_function_type_list (void_type_node, NULL_TREE));
25269 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25270 NULL_TREE, void_type_node);
25271 TREE_PUBLIC (decl) = 1;
25272 TREE_STATIC (decl) = 1;
25273
25274 #if RS6000_WEAK
25275 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25276 {
25277 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25278 targetm.asm_out.unique_section (decl, 0);
25279 switch_to_section (get_named_section (decl, NULL, 0));
25280 DECL_WEAK (decl) = 1;
25281 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25282 targetm.asm_out.globalize_label (asm_out_file, name);
25283 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25284 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25285 }
25286 else
25287 #endif
25288 {
25289 switch_to_section (text_section);
25290 ASM_OUTPUT_LABEL (asm_out_file, name);
25291 }
25292
25293 DECL_INITIAL (decl) = make_node (BLOCK);
25294 current_function_decl = decl;
25295 allocate_struct_function (decl, false);
25296 init_function_start (decl);
25297 first_function_block_is_cold = false;
25298 /* Make sure unwind info is emitted for the thunk if needed. */
25299 final_start_function (emit_barrier (), asm_out_file, 1);
25300
25301 fputs ("\tblr\n", asm_out_file);
25302
25303 final_end_function ();
25304 init_insn_lengths ();
25305 free_after_compilation (cfun);
25306 set_cfun (NULL);
25307 current_function_decl = NULL;
25308 }
25309
25310 /* Add r30 to hard reg set if the prologue sets it up and it is not
25311 pic_offset_table_rtx. */
25312
25313 static void
25314 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25315 {
25316 if (!TARGET_SINGLE_PIC_BASE
25317 && TARGET_TOC
25318 && TARGET_MINIMAL_TOC
25319 && !constant_pool_empty_p ())
25320 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25321 if (cfun->machine->split_stack_argp_used)
25322 add_to_hard_reg_set (&set->set, Pmode, 12);
25323
25324 /* Make sure the hard reg set doesn't include r2, which was possibly added
25325 via PIC_OFFSET_TABLE_REGNUM. */
25326 if (TARGET_TOC)
25327 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25328 }
25329
25330 \f
25331 /* Helper function for rs6000_split_logical to emit a logical instruction after
25332 spliting the operation to single GPR registers.
25333
25334 DEST is the destination register.
25335 OP1 and OP2 are the input source registers.
25336 CODE is the base operation (AND, IOR, XOR, NOT).
25337 MODE is the machine mode.
25338 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25339 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25340 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25341
25342 static void
25343 rs6000_split_logical_inner (rtx dest,
25344 rtx op1,
25345 rtx op2,
25346 enum rtx_code code,
25347 machine_mode mode,
25348 bool complement_final_p,
25349 bool complement_op1_p,
25350 bool complement_op2_p)
25351 {
25352 rtx bool_rtx;
25353
25354 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25355 if (op2 && CONST_INT_P (op2)
25356 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25357 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25358 {
25359 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25360 HOST_WIDE_INT value = INTVAL (op2) & mask;
25361
25362 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25363 if (code == AND)
25364 {
25365 if (value == 0)
25366 {
25367 emit_insn (gen_rtx_SET (dest, const0_rtx));
25368 return;
25369 }
25370
25371 else if (value == mask)
25372 {
25373 if (!rtx_equal_p (dest, op1))
25374 emit_insn (gen_rtx_SET (dest, op1));
25375 return;
25376 }
25377 }
25378
25379 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25380 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25381 else if (code == IOR || code == XOR)
25382 {
25383 if (value == 0)
25384 {
25385 if (!rtx_equal_p (dest, op1))
25386 emit_insn (gen_rtx_SET (dest, op1));
25387 return;
25388 }
25389 }
25390 }
25391
25392 if (code == AND && mode == SImode
25393 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25394 {
25395 emit_insn (gen_andsi3 (dest, op1, op2));
25396 return;
25397 }
25398
25399 if (complement_op1_p)
25400 op1 = gen_rtx_NOT (mode, op1);
25401
25402 if (complement_op2_p)
25403 op2 = gen_rtx_NOT (mode, op2);
25404
25405 /* For canonical RTL, if only one arm is inverted it is the first. */
25406 if (!complement_op1_p && complement_op2_p)
25407 std::swap (op1, op2);
25408
25409 bool_rtx = ((code == NOT)
25410 ? gen_rtx_NOT (mode, op1)
25411 : gen_rtx_fmt_ee (code, mode, op1, op2));
25412
25413 if (complement_final_p)
25414 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25415
25416 emit_insn (gen_rtx_SET (dest, bool_rtx));
25417 }
25418
25419 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25420 operations are split immediately during RTL generation to allow for more
25421 optimizations of the AND/IOR/XOR.
25422
25423 OPERANDS is an array containing the destination and two input operands.
25424 CODE is the base operation (AND, IOR, XOR, NOT).
25425 MODE is the machine mode.
25426 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25427 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25428 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25429 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25430 formation of the AND instructions. */
25431
25432 static void
25433 rs6000_split_logical_di (rtx operands[3],
25434 enum rtx_code code,
25435 bool complement_final_p,
25436 bool complement_op1_p,
25437 bool complement_op2_p)
25438 {
25439 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25440 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25441 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25442 enum hi_lo { hi = 0, lo = 1 };
25443 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25444 size_t i;
25445
25446 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25447 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25448 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25449 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25450
25451 if (code == NOT)
25452 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25453 else
25454 {
25455 if (!CONST_INT_P (operands[2]))
25456 {
25457 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25458 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25459 }
25460 else
25461 {
25462 HOST_WIDE_INT value = INTVAL (operands[2]);
25463 HOST_WIDE_INT value_hi_lo[2];
25464
25465 gcc_assert (!complement_final_p);
25466 gcc_assert (!complement_op1_p);
25467 gcc_assert (!complement_op2_p);
25468
25469 value_hi_lo[hi] = value >> 32;
25470 value_hi_lo[lo] = value & lower_32bits;
25471
25472 for (i = 0; i < 2; i++)
25473 {
25474 HOST_WIDE_INT sub_value = value_hi_lo[i];
25475
25476 if (sub_value & sign_bit)
25477 sub_value |= upper_32bits;
25478
25479 op2_hi_lo[i] = GEN_INT (sub_value);
25480
25481 /* If this is an AND instruction, check to see if we need to load
25482 the value in a register. */
25483 if (code == AND && sub_value != -1 && sub_value != 0
25484 && !and_operand (op2_hi_lo[i], SImode))
25485 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25486 }
25487 }
25488 }
25489
25490 for (i = 0; i < 2; i++)
25491 {
25492 /* Split large IOR/XOR operations. */
25493 if ((code == IOR || code == XOR)
25494 && CONST_INT_P (op2_hi_lo[i])
25495 && !complement_final_p
25496 && !complement_op1_p
25497 && !complement_op2_p
25498 && !logical_const_operand (op2_hi_lo[i], SImode))
25499 {
25500 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25501 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25502 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25503 rtx tmp = gen_reg_rtx (SImode);
25504
25505 /* Make sure the constant is sign extended. */
25506 if ((hi_16bits & sign_bit) != 0)
25507 hi_16bits |= upper_32bits;
25508
25509 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25510 code, SImode, false, false, false);
25511
25512 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25513 code, SImode, false, false, false);
25514 }
25515 else
25516 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25517 code, SImode, complement_final_p,
25518 complement_op1_p, complement_op2_p);
25519 }
25520
25521 return;
25522 }
25523
25524 /* Split the insns that make up boolean operations operating on multiple GPR
25525 registers. The boolean MD patterns ensure that the inputs either are
25526 exactly the same as the output registers, or there is no overlap.
25527
25528 OPERANDS is an array containing the destination and two input operands.
25529 CODE is the base operation (AND, IOR, XOR, NOT).
25530 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25531 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25532 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25533
25534 void
25535 rs6000_split_logical (rtx operands[3],
25536 enum rtx_code code,
25537 bool complement_final_p,
25538 bool complement_op1_p,
25539 bool complement_op2_p)
25540 {
25541 machine_mode mode = GET_MODE (operands[0]);
25542 machine_mode sub_mode;
25543 rtx op0, op1, op2;
25544 int sub_size, regno0, regno1, nregs, i;
25545
25546 /* If this is DImode, use the specialized version that can run before
25547 register allocation. */
25548 if (mode == DImode && !TARGET_POWERPC64)
25549 {
25550 rs6000_split_logical_di (operands, code, complement_final_p,
25551 complement_op1_p, complement_op2_p);
25552 return;
25553 }
25554
25555 op0 = operands[0];
25556 op1 = operands[1];
25557 op2 = (code == NOT) ? NULL_RTX : operands[2];
25558 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25559 sub_size = GET_MODE_SIZE (sub_mode);
25560 regno0 = REGNO (op0);
25561 regno1 = REGNO (op1);
25562
25563 gcc_assert (reload_completed);
25564 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25565 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25566
25567 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25568 gcc_assert (nregs > 1);
25569
25570 if (op2 && REG_P (op2))
25571 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25572
25573 for (i = 0; i < nregs; i++)
25574 {
25575 int offset = i * sub_size;
25576 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25577 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25578 rtx sub_op2 = ((code == NOT)
25579 ? NULL_RTX
25580 : simplify_subreg (sub_mode, op2, mode, offset));
25581
25582 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25583 complement_final_p, complement_op1_p,
25584 complement_op2_p);
25585 }
25586
25587 return;
25588 }
25589
25590 \f
25591 /* Return true if the peephole2 can combine a load involving a combination of
25592 an addis instruction and a load with an offset that can be fused together on
25593 a power8. */
25594
25595 bool
25596 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25597 rtx addis_value, /* addis value. */
25598 rtx target, /* target register that is loaded. */
25599 rtx mem) /* bottom part of the memory addr. */
25600 {
25601 rtx addr;
25602 rtx base_reg;
25603
25604 /* Validate arguments. */
25605 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25606 return false;
25607
25608 if (!base_reg_operand (target, GET_MODE (target)))
25609 return false;
25610
25611 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25612 return false;
25613
25614 /* Allow sign/zero extension. */
25615 if (GET_CODE (mem) == ZERO_EXTEND
25616 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25617 mem = XEXP (mem, 0);
25618
25619 if (!MEM_P (mem))
25620 return false;
25621
25622 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25623 return false;
25624
25625 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25626 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25627 return false;
25628
25629 /* Validate that the register used to load the high value is either the
25630 register being loaded, or we can safely replace its use.
25631
25632 This function is only called from the peephole2 pass and we assume that
25633 there are 2 instructions in the peephole (addis and load), so we want to
25634 check if the target register was not used in the memory address and the
25635 register to hold the addis result is dead after the peephole. */
25636 if (REGNO (addis_reg) != REGNO (target))
25637 {
25638 if (reg_mentioned_p (target, mem))
25639 return false;
25640
25641 if (!peep2_reg_dead_p (2, addis_reg))
25642 return false;
25643
25644 /* If the target register being loaded is the stack pointer, we must
25645 avoid loading any other value into it, even temporarily. */
25646 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25647 return false;
25648 }
25649
25650 base_reg = XEXP (addr, 0);
25651 return REGNO (addis_reg) == REGNO (base_reg);
25652 }
25653
25654 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25655 sequence. We adjust the addis register to use the target register. If the
25656 load sign extends, we adjust the code to do the zero extending load, and an
25657 explicit sign extension later since the fusion only covers zero extending
25658 loads.
25659
25660 The operands are:
25661 operands[0] register set with addis (to be replaced with target)
25662 operands[1] value set via addis
25663 operands[2] target register being loaded
25664 operands[3] D-form memory reference using operands[0]. */
25665
25666 void
25667 expand_fusion_gpr_load (rtx *operands)
25668 {
25669 rtx addis_value = operands[1];
25670 rtx target = operands[2];
25671 rtx orig_mem = operands[3];
25672 rtx new_addr, new_mem, orig_addr, offset;
25673 enum rtx_code plus_or_lo_sum;
25674 machine_mode target_mode = GET_MODE (target);
25675 machine_mode extend_mode = target_mode;
25676 machine_mode ptr_mode = Pmode;
25677 enum rtx_code extend = UNKNOWN;
25678
25679 if (GET_CODE (orig_mem) == ZERO_EXTEND
25680 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25681 {
25682 extend = GET_CODE (orig_mem);
25683 orig_mem = XEXP (orig_mem, 0);
25684 target_mode = GET_MODE (orig_mem);
25685 }
25686
25687 gcc_assert (MEM_P (orig_mem));
25688
25689 orig_addr = XEXP (orig_mem, 0);
25690 plus_or_lo_sum = GET_CODE (orig_addr);
25691 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25692
25693 offset = XEXP (orig_addr, 1);
25694 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25695 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25696
25697 if (extend != UNKNOWN)
25698 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25699
25700 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25701 UNSPEC_FUSION_GPR);
25702 emit_insn (gen_rtx_SET (target, new_mem));
25703
25704 if (extend == SIGN_EXTEND)
25705 {
25706 int sub_off = ((BYTES_BIG_ENDIAN)
25707 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25708 : 0);
25709 rtx sign_reg
25710 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25711
25712 emit_insn (gen_rtx_SET (target,
25713 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25714 }
25715
25716 return;
25717 }
25718
25719 /* Emit the addis instruction that will be part of a fused instruction
25720 sequence. */
25721
25722 void
25723 emit_fusion_addis (rtx target, rtx addis_value)
25724 {
25725 rtx fuse_ops[10];
25726 const char *addis_str = NULL;
25727
25728 /* Emit the addis instruction. */
25729 fuse_ops[0] = target;
25730 if (satisfies_constraint_L (addis_value))
25731 {
25732 fuse_ops[1] = addis_value;
25733 addis_str = "lis %0,%v1";
25734 }
25735
25736 else if (GET_CODE (addis_value) == PLUS)
25737 {
25738 rtx op0 = XEXP (addis_value, 0);
25739 rtx op1 = XEXP (addis_value, 1);
25740
25741 if (REG_P (op0) && CONST_INT_P (op1)
25742 && satisfies_constraint_L (op1))
25743 {
25744 fuse_ops[1] = op0;
25745 fuse_ops[2] = op1;
25746 addis_str = "addis %0,%1,%v2";
25747 }
25748 }
25749
25750 else if (GET_CODE (addis_value) == HIGH)
25751 {
25752 rtx value = XEXP (addis_value, 0);
25753 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25754 {
25755 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25756 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25757 if (TARGET_ELF)
25758 addis_str = "addis %0,%2,%1@toc@ha";
25759
25760 else if (TARGET_XCOFF)
25761 addis_str = "addis %0,%1@u(%2)";
25762
25763 else
25764 gcc_unreachable ();
25765 }
25766
25767 else if (GET_CODE (value) == PLUS)
25768 {
25769 rtx op0 = XEXP (value, 0);
25770 rtx op1 = XEXP (value, 1);
25771
25772 if (GET_CODE (op0) == UNSPEC
25773 && XINT (op0, 1) == UNSPEC_TOCREL
25774 && CONST_INT_P (op1))
25775 {
25776 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25777 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25778 fuse_ops[3] = op1;
25779 if (TARGET_ELF)
25780 addis_str = "addis %0,%2,%1+%3@toc@ha";
25781
25782 else if (TARGET_XCOFF)
25783 addis_str = "addis %0,%1+%3@u(%2)";
25784
25785 else
25786 gcc_unreachable ();
25787 }
25788 }
25789
25790 else if (satisfies_constraint_L (value))
25791 {
25792 fuse_ops[1] = value;
25793 addis_str = "lis %0,%v1";
25794 }
25795
25796 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25797 {
25798 fuse_ops[1] = value;
25799 addis_str = "lis %0,%1@ha";
25800 }
25801 }
25802
25803 if (!addis_str)
25804 fatal_insn ("Could not generate addis value for fusion", addis_value);
25805
25806 output_asm_insn (addis_str, fuse_ops);
25807 }
25808
25809 /* Emit a D-form load or store instruction that is the second instruction
25810 of a fusion sequence. */
25811
25812 static void
25813 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25814 {
25815 rtx fuse_ops[10];
25816 char insn_template[80];
25817
25818 fuse_ops[0] = load_reg;
25819 fuse_ops[1] = addis_reg;
25820
25821 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25822 {
25823 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25824 fuse_ops[2] = offset;
25825 output_asm_insn (insn_template, fuse_ops);
25826 }
25827
25828 else if (GET_CODE (offset) == UNSPEC
25829 && XINT (offset, 1) == UNSPEC_TOCREL)
25830 {
25831 if (TARGET_ELF)
25832 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25833
25834 else if (TARGET_XCOFF)
25835 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25836
25837 else
25838 gcc_unreachable ();
25839
25840 fuse_ops[2] = XVECEXP (offset, 0, 0);
25841 output_asm_insn (insn_template, fuse_ops);
25842 }
25843
25844 else if (GET_CODE (offset) == PLUS
25845 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25846 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25847 && CONST_INT_P (XEXP (offset, 1)))
25848 {
25849 rtx tocrel_unspec = XEXP (offset, 0);
25850 if (TARGET_ELF)
25851 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25852
25853 else if (TARGET_XCOFF)
25854 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25855
25856 else
25857 gcc_unreachable ();
25858
25859 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25860 fuse_ops[3] = XEXP (offset, 1);
25861 output_asm_insn (insn_template, fuse_ops);
25862 }
25863
25864 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25865 {
25866 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25867
25868 fuse_ops[2] = offset;
25869 output_asm_insn (insn_template, fuse_ops);
25870 }
25871
25872 else
25873 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25874
25875 return;
25876 }
25877
25878 /* Given an address, convert it into the addis and load offset parts. Addresses
25879 created during the peephole2 process look like:
25880 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25881 (unspec [(...)] UNSPEC_TOCREL)) */
25882
25883 static void
25884 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25885 {
25886 rtx hi, lo;
25887
25888 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25889 {
25890 hi = XEXP (addr, 0);
25891 lo = XEXP (addr, 1);
25892 }
25893 else
25894 gcc_unreachable ();
25895
25896 *p_hi = hi;
25897 *p_lo = lo;
25898 }
25899
25900 /* Return a string to fuse an addis instruction with a gpr load to the same
25901 register that we loaded up the addis instruction. The address that is used
25902 is the logical address that was formed during peephole2:
25903 (lo_sum (high) (low-part))
25904
25905 The code is complicated, so we call output_asm_insn directly, and just
25906 return "". */
25907
25908 const char *
25909 emit_fusion_gpr_load (rtx target, rtx mem)
25910 {
25911 rtx addis_value;
25912 rtx addr;
25913 rtx load_offset;
25914 const char *load_str = NULL;
25915 machine_mode mode;
25916
25917 if (GET_CODE (mem) == ZERO_EXTEND)
25918 mem = XEXP (mem, 0);
25919
25920 gcc_assert (REG_P (target) && MEM_P (mem));
25921
25922 addr = XEXP (mem, 0);
25923 fusion_split_address (addr, &addis_value, &load_offset);
25924
25925 /* Now emit the load instruction to the same register. */
25926 mode = GET_MODE (mem);
25927 switch (mode)
25928 {
25929 case E_QImode:
25930 load_str = "lbz";
25931 break;
25932
25933 case E_HImode:
25934 load_str = "lhz";
25935 break;
25936
25937 case E_SImode:
25938 case E_SFmode:
25939 load_str = "lwz";
25940 break;
25941
25942 case E_DImode:
25943 case E_DFmode:
25944 gcc_assert (TARGET_POWERPC64);
25945 load_str = "ld";
25946 break;
25947
25948 default:
25949 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25950 }
25951
25952 /* Emit the addis instruction. */
25953 emit_fusion_addis (target, addis_value);
25954
25955 /* Emit the D-form load instruction. */
25956 emit_fusion_load (target, target, load_offset, load_str);
25957
25958 return "";
25959 }
25960 \f
25961
25962 #ifdef RS6000_GLIBC_ATOMIC_FENV
25963 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25964 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25965 #endif
25966
25967 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25968
25969 static void
25970 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25971 {
25972 if (!TARGET_HARD_FLOAT)
25973 {
25974 #ifdef RS6000_GLIBC_ATOMIC_FENV
25975 if (atomic_hold_decl == NULL_TREE)
25976 {
25977 atomic_hold_decl
25978 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25979 get_identifier ("__atomic_feholdexcept"),
25980 build_function_type_list (void_type_node,
25981 double_ptr_type_node,
25982 NULL_TREE));
25983 TREE_PUBLIC (atomic_hold_decl) = 1;
25984 DECL_EXTERNAL (atomic_hold_decl) = 1;
25985 }
25986
25987 if (atomic_clear_decl == NULL_TREE)
25988 {
25989 atomic_clear_decl
25990 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25991 get_identifier ("__atomic_feclearexcept"),
25992 build_function_type_list (void_type_node,
25993 NULL_TREE));
25994 TREE_PUBLIC (atomic_clear_decl) = 1;
25995 DECL_EXTERNAL (atomic_clear_decl) = 1;
25996 }
25997
25998 tree const_double = build_qualified_type (double_type_node,
25999 TYPE_QUAL_CONST);
26000 tree const_double_ptr = build_pointer_type (const_double);
26001 if (atomic_update_decl == NULL_TREE)
26002 {
26003 atomic_update_decl
26004 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26005 get_identifier ("__atomic_feupdateenv"),
26006 build_function_type_list (void_type_node,
26007 const_double_ptr,
26008 NULL_TREE));
26009 TREE_PUBLIC (atomic_update_decl) = 1;
26010 DECL_EXTERNAL (atomic_update_decl) = 1;
26011 }
26012
26013 tree fenv_var = create_tmp_var_raw (double_type_node);
26014 TREE_ADDRESSABLE (fenv_var) = 1;
26015 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26016 build4 (TARGET_EXPR, double_type_node, fenv_var,
26017 void_node, NULL_TREE, NULL_TREE));
26018
26019 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26020 *clear = build_call_expr (atomic_clear_decl, 0);
26021 *update = build_call_expr (atomic_update_decl, 1,
26022 fold_convert (const_double_ptr, fenv_addr));
26023 #endif
26024 return;
26025 }
26026
26027 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26028 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26029 tree call_mffs = build_call_expr (mffs, 0);
26030
26031 /* Generates the equivalent of feholdexcept (&fenv_var)
26032
26033 *fenv_var = __builtin_mffs ();
26034 double fenv_hold;
26035 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26036 __builtin_mtfsf (0xff, fenv_hold); */
26037
26038 /* Mask to clear everything except for the rounding modes and non-IEEE
26039 arithmetic flag. */
26040 const unsigned HOST_WIDE_INT hold_exception_mask
26041 = HOST_WIDE_INT_C (0xffffffff00000007);
26042
26043 tree fenv_var = create_tmp_var_raw (double_type_node);
26044
26045 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26046 NULL_TREE, NULL_TREE);
26047
26048 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26049 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26050 build_int_cst (uint64_type_node,
26051 hold_exception_mask));
26052
26053 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26054 fenv_llu_and);
26055
26056 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26057 build_int_cst (unsigned_type_node, 0xff),
26058 fenv_hold_mtfsf);
26059
26060 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26061
26062 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26063
26064 double fenv_clear = __builtin_mffs ();
26065 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26066 __builtin_mtfsf (0xff, fenv_clear); */
26067
26068 /* Mask to clear everything except for the rounding modes and non-IEEE
26069 arithmetic flag. */
26070 const unsigned HOST_WIDE_INT clear_exception_mask
26071 = HOST_WIDE_INT_C (0xffffffff00000000);
26072
26073 tree fenv_clear = create_tmp_var_raw (double_type_node);
26074
26075 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26076 call_mffs, NULL_TREE, NULL_TREE);
26077
26078 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26079 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26080 fenv_clean_llu,
26081 build_int_cst (uint64_type_node,
26082 clear_exception_mask));
26083
26084 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26085 fenv_clear_llu_and);
26086
26087 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26088 build_int_cst (unsigned_type_node, 0xff),
26089 fenv_clear_mtfsf);
26090
26091 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26092
26093 /* Generates the equivalent of feupdateenv (&fenv_var)
26094
26095 double old_fenv = __builtin_mffs ();
26096 double fenv_update;
26097 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26098 (*(uint64_t*)fenv_var 0x1ff80fff);
26099 __builtin_mtfsf (0xff, fenv_update); */
26100
26101 const unsigned HOST_WIDE_INT update_exception_mask
26102 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26103 const unsigned HOST_WIDE_INT new_exception_mask
26104 = HOST_WIDE_INT_C (0x1ff80fff);
26105
26106 tree old_fenv = create_tmp_var_raw (double_type_node);
26107 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26108 call_mffs, NULL_TREE, NULL_TREE);
26109
26110 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26111 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26112 build_int_cst (uint64_type_node,
26113 update_exception_mask));
26114
26115 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26116 build_int_cst (uint64_type_node,
26117 new_exception_mask));
26118
26119 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26120 old_llu_and, new_llu_and);
26121
26122 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26123 new_llu_mask);
26124
26125 tree update_mtfsf = build_call_expr (mtfsf, 2,
26126 build_int_cst (unsigned_type_node, 0xff),
26127 fenv_update_mtfsf);
26128
26129 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26130 }
26131
26132 void
26133 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26134 {
26135 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26136
26137 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26138 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26139
26140 /* The destination of the vmrgew instruction layout is:
26141 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26142 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26143 vmrgew instruction will be correct. */
26144 if (BYTES_BIG_ENDIAN)
26145 {
26146 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26147 GEN_INT (0)));
26148 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26149 GEN_INT (3)));
26150 }
26151 else
26152 {
26153 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26154 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26155 }
26156
26157 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26158 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26159
26160 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26161 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26162
26163 if (BYTES_BIG_ENDIAN)
26164 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26165 else
26166 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26167 }
26168
26169 void
26170 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26171 {
26172 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26173
26174 rtx_tmp0 = gen_reg_rtx (V2DImode);
26175 rtx_tmp1 = gen_reg_rtx (V2DImode);
26176
26177 /* The destination of the vmrgew instruction layout is:
26178 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26179 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26180 vmrgew instruction will be correct. */
26181 if (BYTES_BIG_ENDIAN)
26182 {
26183 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26184 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26185 }
26186 else
26187 {
26188 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26189 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26190 }
26191
26192 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26193 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26194
26195 if (signed_convert)
26196 {
26197 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26198 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26199 }
26200 else
26201 {
26202 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26203 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26204 }
26205
26206 if (BYTES_BIG_ENDIAN)
26207 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26208 else
26209 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26210 }
26211
26212 void
26213 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26214 rtx src2)
26215 {
26216 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26217
26218 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26219 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26220
26221 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26222 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26223
26224 rtx_tmp2 = gen_reg_rtx (V4SImode);
26225 rtx_tmp3 = gen_reg_rtx (V4SImode);
26226
26227 if (signed_convert)
26228 {
26229 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26230 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26231 }
26232 else
26233 {
26234 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26235 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26236 }
26237
26238 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26239 }
26240
26241 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26242
26243 static bool
26244 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26245 optimization_type opt_type)
26246 {
26247 switch (op)
26248 {
26249 case rsqrt_optab:
26250 return (opt_type == OPTIMIZE_FOR_SPEED
26251 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26252
26253 default:
26254 return true;
26255 }
26256 }
26257
26258 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26259
26260 static HOST_WIDE_INT
26261 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26262 {
26263 if (TREE_CODE (exp) == STRING_CST
26264 && (STRICT_ALIGNMENT || !optimize_size))
26265 return MAX (align, BITS_PER_WORD);
26266 return align;
26267 }
26268
26269 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26270
26271 static HOST_WIDE_INT
26272 rs6000_starting_frame_offset (void)
26273 {
26274 if (FRAME_GROWS_DOWNWARD)
26275 return 0;
26276 return RS6000_STARTING_FRAME_OFFSET;
26277 }
26278 \f
26279
26280 /* Create an alias for a mangled name where we have changed the mangling (in
26281 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26282 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26283
26284 #if TARGET_ELF && RS6000_WEAK
26285 static void
26286 rs6000_globalize_decl_name (FILE * stream, tree decl)
26287 {
26288 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26289
26290 targetm.asm_out.globalize_label (stream, name);
26291
26292 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26293 {
26294 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26295 const char *old_name;
26296
26297 ieee128_mangling_gcc_8_1 = true;
26298 lang_hooks.set_decl_assembler_name (decl);
26299 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26300 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26301 ieee128_mangling_gcc_8_1 = false;
26302
26303 if (strcmp (name, old_name) != 0)
26304 {
26305 fprintf (stream, "\t.weak %s\n", old_name);
26306 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26307 }
26308 }
26309 }
26310 #endif
26311
26312 \f
26313 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26314 function names from <foo>l to <foo>f128 if the default long double type is
26315 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26316 include file switches the names on systems that support long double as IEEE
26317 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26318 In the future, glibc will export names like __ieee128_sinf128 and we can
26319 switch to using those instead of using sinf128, which pollutes the user's
26320 namespace.
26321
26322 This will switch the names for Fortran math functions as well (which doesn't
26323 use math.h). However, Fortran needs other changes to the compiler and
26324 library before you can switch the real*16 type at compile time.
26325
26326 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26327 only do this if the default is that long double is IBM extended double, and
26328 the user asked for IEEE 128-bit. */
26329
26330 static tree
26331 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26332 {
26333 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26334 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26335 {
26336 size_t len = IDENTIFIER_LENGTH (id);
26337 const char *name = IDENTIFIER_POINTER (id);
26338
26339 if (name[len - 1] == 'l')
26340 {
26341 bool uses_ieee128_p = false;
26342 tree type = TREE_TYPE (decl);
26343 machine_mode ret_mode = TYPE_MODE (type);
26344
26345 /* See if the function returns a IEEE 128-bit floating point type or
26346 complex type. */
26347 if (ret_mode == TFmode || ret_mode == TCmode)
26348 uses_ieee128_p = true;
26349 else
26350 {
26351 function_args_iterator args_iter;
26352 tree arg;
26353
26354 /* See if the function passes a IEEE 128-bit floating point type
26355 or complex type. */
26356 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26357 {
26358 machine_mode arg_mode = TYPE_MODE (arg);
26359 if (arg_mode == TFmode || arg_mode == TCmode)
26360 {
26361 uses_ieee128_p = true;
26362 break;
26363 }
26364 }
26365 }
26366
26367 /* If we passed or returned an IEEE 128-bit floating point type,
26368 change the name. */
26369 if (uses_ieee128_p)
26370 {
26371 char *name2 = (char *) alloca (len + 4);
26372 memcpy (name2, name, len - 1);
26373 strcpy (name2 + len - 1, "f128");
26374 id = get_identifier (name2);
26375 }
26376 }
26377 }
26378
26379 return id;
26380 }
26381
26382 /* Predict whether the given loop in gimple will be transformed in the RTL
26383 doloop_optimize pass. */
26384
26385 static bool
26386 rs6000_predict_doloop_p (struct loop *loop)
26387 {
26388 gcc_assert (loop);
26389
26390 /* On rs6000, targetm.can_use_doloop_p is actually
26391 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26392 if (loop->inner != NULL)
26393 {
26394 if (dump_file && (dump_flags & TDF_DETAILS))
26395 fprintf (dump_file, "Predict doloop failure due to"
26396 " loop nesting.\n");
26397 return false;
26398 }
26399
26400 return true;
26401 }
26402
26403 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
26404
26405 static bool
26406 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
26407 {
26408 gcc_assert (MEM_P (mem));
26409
26410 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
26411 type addresses, so don't allow MEMs with those address types to be
26412 substituted as an equivalent expression. See PR93974 for details. */
26413 if (GET_CODE (XEXP (mem, 0)) == AND)
26414 return true;
26415
26416 return false;
26417 }
26418
26419 struct gcc_target targetm = TARGET_INITIALIZER;
26420
26421 #include "gt-rs6000.h"