1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
46 #include "fold-const.h"
48 #include "stor-layout.h"
50 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
79 #include "gstab.h" /* for N_SLINE */
81 #include "case-cfn-macros.h"
83 #include "tree-ssa-propagate.h"
85 #include "tree-ssanames.h"
86 #include "rs6000-internal.h"
88 /* This file should be included last. */
89 #include "target-def.h"
91 #ifndef TARGET_NO_PROTOTYPE
92 #define TARGET_NO_PROTOTYPE 0
95 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
96 systems will also set long double to be IEEE 128-bit. AIX and Darwin
97 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
98 those systems will not pick up this default. This needs to be after all
99 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
101 #ifndef TARGET_IEEEQUAD_DEFAULT
102 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
103 #define TARGET_IEEEQUAD_DEFAULT 1
105 #define TARGET_IEEEQUAD_DEFAULT 0
109 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
111 /* Support targetm.vectorize.builtin_mask_for_load. */
112 static GTY(()) tree altivec_builtin_mask_for_load;
114 /* Set to nonzero once AIX common-mode calls have been defined. */
115 static GTY(()) int common_mode_defined;
118 /* Counter for labels which are to be placed in .fixup. */
119 int fixuplabelno = 0;
122 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
125 /* Specify the machine mode that pointers have. After generation of rtl, the
126 compiler makes no further distinction between pointers and any other objects
127 of this machine mode. */
128 scalar_int_mode rs6000_pmode;
131 /* Note whether IEEE 128-bit floating point was passed or returned, either as
132 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
133 floating point. We changed the default C++ mangling for these types and we
134 may want to generate a weak alias of the old mangling (U10__float128) to the
135 new mangling (u9__ieee128). */
136 static bool rs6000_passes_ieee128;
139 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
140 name used in current releases (i.e. u9__ieee128). */
141 static bool ieee128_mangling_gcc_8_1;
143 /* Width in bits of a pointer. */
144 unsigned rs6000_pointer_size;
146 #ifdef HAVE_AS_GNU_ATTRIBUTE
147 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
148 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
150 /* Flag whether floating point values have been passed/returned.
151 Note that this doesn't say whether fprs are used, since the
152 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
153 should be set for soft-float values passed in gprs and ieee128
154 values passed in vsx registers. */
155 static bool rs6000_passes_float;
156 static bool rs6000_passes_long_double;
157 /* Flag whether vector values have been passed/returned. */
158 static bool rs6000_passes_vector;
159 /* Flag whether small (<= 8 byte) structures have been returned. */
160 static bool rs6000_returns_struct;
163 /* Value is TRUE if register/mode pair is acceptable. */
164 static bool rs6000_hard_regno_mode_ok_p
165 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
167 /* Maximum number of registers needed for a given register class and mode. */
168 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
170 /* How many registers are needed for a given register and mode. */
171 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
173 /* Map register number to register class. */
174 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
176 static int dbg_cost_ctrl;
178 /* Built in types. */
179 tree rs6000_builtin_types[RS6000_BTI_MAX];
180 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
182 /* Flag to say the TOC is initialized */
183 int toc_initialized, need_toc_init;
184 char toc_label_name[10];
186 /* Cached value of rs6000_variable_issue. This is cached in
187 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
188 static short cached_can_issue_more;
190 static GTY(()) section *read_only_data_section;
191 static GTY(()) section *private_data_section;
192 static GTY(()) section *tls_data_section;
193 static GTY(()) section *tls_private_data_section;
194 static GTY(()) section *read_only_private_data_section;
195 static GTY(()) section *sdata2_section;
197 extern GTY(()) section *toc_section;
198 section *toc_section = 0;
200 struct builtin_description
202 const HOST_WIDE_INT mask;
203 const enum insn_code icode;
204 const char *const name;
205 const enum rs6000_builtins code;
208 /* Describe the vector unit used for modes. */
209 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
210 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
212 /* Register classes for various constraints that are based on the target
214 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
216 /* Describe the alignment of a vector. */
217 int rs6000_vector_align[NUM_MACHINE_MODES];
219 /* Map selected modes to types for builtins. */
220 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
222 /* What modes to automatically generate reciprocal divide estimate (fre) and
223 reciprocal sqrt (frsqrte) for. */
224 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
226 /* Masks to determine which reciprocal esitmate instructions to generate
228 enum rs6000_recip_mask {
229 RECIP_SF_DIV = 0x001, /* Use divide estimate */
230 RECIP_DF_DIV = 0x002,
231 RECIP_V4SF_DIV = 0x004,
232 RECIP_V2DF_DIV = 0x008,
234 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
235 RECIP_DF_RSQRT = 0x020,
236 RECIP_V4SF_RSQRT = 0x040,
237 RECIP_V2DF_RSQRT = 0x080,
239 /* Various combination of flags for -mrecip=xxx. */
241 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
242 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
243 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
245 RECIP_HIGH_PRECISION = RECIP_ALL,
247 /* On low precision machines like the power5, don't enable double precision
248 reciprocal square root estimate, since it isn't accurate enough. */
249 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
252 /* -mrecip options. */
255 const char *string; /* option name */
256 unsigned int mask; /* mask bits to set */
257 } recip_options[] = {
258 { "all", RECIP_ALL },
259 { "none", RECIP_NONE },
260 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
262 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
263 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
264 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
265 | RECIP_V2DF_RSQRT) },
266 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
267 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
270 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
276 { "power9", PPC_PLATFORM_POWER9 },
277 { "power8", PPC_PLATFORM_POWER8 },
278 { "power7", PPC_PLATFORM_POWER7 },
279 { "power6x", PPC_PLATFORM_POWER6X },
280 { "power6", PPC_PLATFORM_POWER6 },
281 { "power5+", PPC_PLATFORM_POWER5_PLUS },
282 { "power5", PPC_PLATFORM_POWER5 },
283 { "ppc970", PPC_PLATFORM_PPC970 },
284 { "power4", PPC_PLATFORM_POWER4 },
285 { "ppca2", PPC_PLATFORM_PPCA2 },
286 { "ppc476", PPC_PLATFORM_PPC476 },
287 { "ppc464", PPC_PLATFORM_PPC464 },
288 { "ppc440", PPC_PLATFORM_PPC440 },
289 { "ppc405", PPC_PLATFORM_PPC405 },
290 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
293 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
299 } cpu_supports_info[] = {
300 /* AT_HWCAP masks. */
301 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
302 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
303 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
304 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
305 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
306 { "booke", PPC_FEATURE_BOOKE, 0 },
307 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
308 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
309 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
310 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
311 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
312 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
313 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
314 { "notb", PPC_FEATURE_NO_TB, 0 },
315 { "pa6t", PPC_FEATURE_PA6T, 0 },
316 { "power4", PPC_FEATURE_POWER4, 0 },
317 { "power5", PPC_FEATURE_POWER5, 0 },
318 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
319 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
320 { "ppc32", PPC_FEATURE_32, 0 },
321 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
322 { "ppc64", PPC_FEATURE_64, 0 },
323 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
324 { "smt", PPC_FEATURE_SMT, 0 },
325 { "spe", PPC_FEATURE_HAS_SPE, 0 },
326 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
327 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
328 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
330 /* AT_HWCAP2 masks. */
331 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
332 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
333 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
334 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
335 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
336 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
337 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
338 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
339 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
340 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
341 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
342 { "darn", PPC_FEATURE2_DARN, 1 },
343 { "scv", PPC_FEATURE2_SCV, 1 }
346 /* On PowerPC, we have a limited number of target clones that we care about
347 which means we can use an array to hold the options, rather than having more
348 elaborate data structures to identify each possible variation. Order the
349 clones from the default to the highest ISA. */
351 CLONE_DEFAULT = 0, /* default clone. */
352 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
353 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
354 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
355 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
359 /* Map compiler ISA bits into HWCAP names. */
361 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
362 const char *name; /* name to use in __builtin_cpu_supports. */
365 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
366 { 0, "" }, /* Default options. */
367 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
368 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
369 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
370 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
374 /* Newer LIBCs explicitly export this symbol to declare that they provide
375 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
376 reference to this symbol whenever we expand a CPU builtin, so that
377 we never link against an old LIBC. */
378 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
380 /* True if we have expanded a CPU builtin. */
383 /* Pointer to function (in rs6000-c.c) that can define or undefine target
384 macros that have changed. Languages that don't support the preprocessor
385 don't link in rs6000-c.c, so we can't call it directly. */
386 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
388 /* Simplfy register classes into simpler classifications. We assume
389 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
390 check for standard register classes (gpr/floating/altivec/vsx) and
391 floating/vector classes (float/altivec/vsx). */
393 enum rs6000_reg_type {
404 /* Map register class to register type. */
405 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
407 /* First/last register type for the 'normal' register types (i.e. general
408 purpose, floating point, altivec, and VSX registers). */
409 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
411 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
414 /* Register classes we care about in secondary reload or go if legitimate
415 address. We only need to worry about GPR, FPR, and Altivec registers here,
416 along an ANY field that is the OR of the 3 register classes. */
418 enum rs6000_reload_reg_type {
419 RELOAD_REG_GPR, /* General purpose registers. */
420 RELOAD_REG_FPR, /* Traditional floating point regs. */
421 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
422 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
426 /* For setting up register classes, loop through the 3 register classes mapping
427 into real registers, and skip the ANY class, which is just an OR of the
429 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
430 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
432 /* Map reload register type to a register in the register class. */
433 struct reload_reg_map_type {
434 const char *name; /* Register class name. */
435 int reg; /* Register in the register class. */
438 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
439 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
440 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
441 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
442 { "Any", -1 }, /* RELOAD_REG_ANY. */
445 /* Mask bits for each register class, indexed per mode. Historically the
446 compiler has been more restrictive which types can do PRE_MODIFY instead of
447 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
448 typedef unsigned char addr_mask_type;
450 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
451 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
452 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
453 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
454 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
455 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
456 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
457 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
459 /* Register type masks based on the type, of valid addressing modes. */
460 struct rs6000_reg_addr {
461 enum insn_code reload_load; /* INSN to reload for loading. */
462 enum insn_code reload_store; /* INSN to reload for storing. */
463 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
464 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
465 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
466 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
467 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
470 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
472 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
474 mode_supports_pre_incdec_p (machine_mode mode)
476 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
480 /* Helper function to say whether a mode supports PRE_MODIFY. */
482 mode_supports_pre_modify_p (machine_mode mode)
484 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
488 /* Return true if we have D-form addressing in altivec registers. */
490 mode_supports_vmx_dform (machine_mode mode)
492 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
495 /* Return true if we have D-form addressing in VSX registers. This addressing
496 is more limited than normal d-form addressing in that the offset must be
497 aligned on a 16-byte boundary. */
499 mode_supports_dq_form (machine_mode mode)
501 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
505 /* Given that there exists at least one variable that is set (produced)
506 by OUT_INSN and read (consumed) by IN_INSN, return true iff
507 IN_INSN represents one or more memory store operations and none of
508 the variables set by OUT_INSN is used by IN_INSN as the address of a
509 store operation. If either IN_INSN or OUT_INSN does not represent
510 a "single" RTL SET expression (as loosely defined by the
511 implementation of the single_set function) or a PARALLEL with only
512 SETs, CLOBBERs, and USEs inside, this function returns false.
514 This rs6000-specific version of store_data_bypass_p checks for
515 certain conditions that result in assertion failures (and internal
516 compiler errors) in the generic store_data_bypass_p function and
517 returns false rather than calling store_data_bypass_p if one of the
518 problematic conditions is detected. */
521 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
528 in_set = single_set (in_insn);
531 if (MEM_P (SET_DEST (in_set)))
533 out_set = single_set (out_insn);
536 out_pat = PATTERN (out_insn);
537 if (GET_CODE (out_pat) == PARALLEL)
539 for (i = 0; i < XVECLEN (out_pat, 0); i++)
541 out_exp = XVECEXP (out_pat, 0, i);
542 if ((GET_CODE (out_exp) == CLOBBER)
543 || (GET_CODE (out_exp) == USE))
545 else if (GET_CODE (out_exp) != SET)
554 in_pat = PATTERN (in_insn);
555 if (GET_CODE (in_pat) != PARALLEL)
558 for (i = 0; i < XVECLEN (in_pat, 0); i++)
560 in_exp = XVECEXP (in_pat, 0, i);
561 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
563 else if (GET_CODE (in_exp) != SET)
566 if (MEM_P (SET_DEST (in_exp)))
568 out_set = single_set (out_insn);
571 out_pat = PATTERN (out_insn);
572 if (GET_CODE (out_pat) != PARALLEL)
574 for (j = 0; j < XVECLEN (out_pat, 0); j++)
576 out_exp = XVECEXP (out_pat, 0, j);
577 if ((GET_CODE (out_exp) == CLOBBER)
578 || (GET_CODE (out_exp) == USE))
580 else if (GET_CODE (out_exp) != SET)
587 return store_data_bypass_p (out_insn, in_insn);
591 /* Processor costs (relative to an add) */
593 const struct processor_costs *rs6000_cost;
595 /* Instruction size costs on 32bit processors. */
597 struct processor_costs size32_cost = {
598 COSTS_N_INSNS (1), /* mulsi */
599 COSTS_N_INSNS (1), /* mulsi_const */
600 COSTS_N_INSNS (1), /* mulsi_const9 */
601 COSTS_N_INSNS (1), /* muldi */
602 COSTS_N_INSNS (1), /* divsi */
603 COSTS_N_INSNS (1), /* divdi */
604 COSTS_N_INSNS (1), /* fp */
605 COSTS_N_INSNS (1), /* dmul */
606 COSTS_N_INSNS (1), /* sdiv */
607 COSTS_N_INSNS (1), /* ddiv */
608 32, /* cache line size */
612 0, /* SF->DF convert */
615 /* Instruction size costs on 64bit processors. */
617 struct processor_costs size64_cost = {
618 COSTS_N_INSNS (1), /* mulsi */
619 COSTS_N_INSNS (1), /* mulsi_const */
620 COSTS_N_INSNS (1), /* mulsi_const9 */
621 COSTS_N_INSNS (1), /* muldi */
622 COSTS_N_INSNS (1), /* divsi */
623 COSTS_N_INSNS (1), /* divdi */
624 COSTS_N_INSNS (1), /* fp */
625 COSTS_N_INSNS (1), /* dmul */
626 COSTS_N_INSNS (1), /* sdiv */
627 COSTS_N_INSNS (1), /* ddiv */
628 128, /* cache line size */
632 0, /* SF->DF convert */
635 /* Instruction costs on RS64A processors. */
637 struct processor_costs rs64a_cost = {
638 COSTS_N_INSNS (20), /* mulsi */
639 COSTS_N_INSNS (12), /* mulsi_const */
640 COSTS_N_INSNS (8), /* mulsi_const9 */
641 COSTS_N_INSNS (34), /* muldi */
642 COSTS_N_INSNS (65), /* divsi */
643 COSTS_N_INSNS (67), /* divdi */
644 COSTS_N_INSNS (4), /* fp */
645 COSTS_N_INSNS (4), /* dmul */
646 COSTS_N_INSNS (31), /* sdiv */
647 COSTS_N_INSNS (31), /* ddiv */
648 128, /* cache line size */
652 0, /* SF->DF convert */
655 /* Instruction costs on MPCCORE processors. */
657 struct processor_costs mpccore_cost = {
658 COSTS_N_INSNS (2), /* mulsi */
659 COSTS_N_INSNS (2), /* mulsi_const */
660 COSTS_N_INSNS (2), /* mulsi_const9 */
661 COSTS_N_INSNS (2), /* muldi */
662 COSTS_N_INSNS (6), /* divsi */
663 COSTS_N_INSNS (6), /* divdi */
664 COSTS_N_INSNS (4), /* fp */
665 COSTS_N_INSNS (5), /* dmul */
666 COSTS_N_INSNS (10), /* sdiv */
667 COSTS_N_INSNS (17), /* ddiv */
668 32, /* cache line size */
672 0, /* SF->DF convert */
675 /* Instruction costs on PPC403 processors. */
677 struct processor_costs ppc403_cost = {
678 COSTS_N_INSNS (4), /* mulsi */
679 COSTS_N_INSNS (4), /* mulsi_const */
680 COSTS_N_INSNS (4), /* mulsi_const9 */
681 COSTS_N_INSNS (4), /* muldi */
682 COSTS_N_INSNS (33), /* divsi */
683 COSTS_N_INSNS (33), /* divdi */
684 COSTS_N_INSNS (11), /* fp */
685 COSTS_N_INSNS (11), /* dmul */
686 COSTS_N_INSNS (11), /* sdiv */
687 COSTS_N_INSNS (11), /* ddiv */
688 32, /* cache line size */
692 0, /* SF->DF convert */
695 /* Instruction costs on PPC405 processors. */
697 struct processor_costs ppc405_cost = {
698 COSTS_N_INSNS (5), /* mulsi */
699 COSTS_N_INSNS (4), /* mulsi_const */
700 COSTS_N_INSNS (3), /* mulsi_const9 */
701 COSTS_N_INSNS (5), /* muldi */
702 COSTS_N_INSNS (35), /* divsi */
703 COSTS_N_INSNS (35), /* divdi */
704 COSTS_N_INSNS (11), /* fp */
705 COSTS_N_INSNS (11), /* dmul */
706 COSTS_N_INSNS (11), /* sdiv */
707 COSTS_N_INSNS (11), /* ddiv */
708 32, /* cache line size */
712 0, /* SF->DF convert */
715 /* Instruction costs on PPC440 processors. */
717 struct processor_costs ppc440_cost = {
718 COSTS_N_INSNS (3), /* mulsi */
719 COSTS_N_INSNS (2), /* mulsi_const */
720 COSTS_N_INSNS (2), /* mulsi_const9 */
721 COSTS_N_INSNS (3), /* muldi */
722 COSTS_N_INSNS (34), /* divsi */
723 COSTS_N_INSNS (34), /* divdi */
724 COSTS_N_INSNS (5), /* fp */
725 COSTS_N_INSNS (5), /* dmul */
726 COSTS_N_INSNS (19), /* sdiv */
727 COSTS_N_INSNS (33), /* ddiv */
728 32, /* cache line size */
732 0, /* SF->DF convert */
735 /* Instruction costs on PPC476 processors. */
737 struct processor_costs ppc476_cost = {
738 COSTS_N_INSNS (4), /* mulsi */
739 COSTS_N_INSNS (4), /* mulsi_const */
740 COSTS_N_INSNS (4), /* mulsi_const9 */
741 COSTS_N_INSNS (4), /* muldi */
742 COSTS_N_INSNS (11), /* divsi */
743 COSTS_N_INSNS (11), /* divdi */
744 COSTS_N_INSNS (6), /* fp */
745 COSTS_N_INSNS (6), /* dmul */
746 COSTS_N_INSNS (19), /* sdiv */
747 COSTS_N_INSNS (33), /* ddiv */
748 32, /* l1 cache line size */
752 0, /* SF->DF convert */
755 /* Instruction costs on PPC601 processors. */
757 struct processor_costs ppc601_cost = {
758 COSTS_N_INSNS (5), /* mulsi */
759 COSTS_N_INSNS (5), /* mulsi_const */
760 COSTS_N_INSNS (5), /* mulsi_const9 */
761 COSTS_N_INSNS (5), /* muldi */
762 COSTS_N_INSNS (36), /* divsi */
763 COSTS_N_INSNS (36), /* divdi */
764 COSTS_N_INSNS (4), /* fp */
765 COSTS_N_INSNS (5), /* dmul */
766 COSTS_N_INSNS (17), /* sdiv */
767 COSTS_N_INSNS (31), /* ddiv */
768 32, /* cache line size */
772 0, /* SF->DF convert */
775 /* Instruction costs on PPC603 processors. */
777 struct processor_costs ppc603_cost = {
778 COSTS_N_INSNS (5), /* mulsi */
779 COSTS_N_INSNS (3), /* mulsi_const */
780 COSTS_N_INSNS (2), /* mulsi_const9 */
781 COSTS_N_INSNS (5), /* muldi */
782 COSTS_N_INSNS (37), /* divsi */
783 COSTS_N_INSNS (37), /* divdi */
784 COSTS_N_INSNS (3), /* fp */
785 COSTS_N_INSNS (4), /* dmul */
786 COSTS_N_INSNS (18), /* sdiv */
787 COSTS_N_INSNS (33), /* ddiv */
788 32, /* cache line size */
792 0, /* SF->DF convert */
795 /* Instruction costs on PPC604 processors. */
797 struct processor_costs ppc604_cost = {
798 COSTS_N_INSNS (4), /* mulsi */
799 COSTS_N_INSNS (4), /* mulsi_const */
800 COSTS_N_INSNS (4), /* mulsi_const9 */
801 COSTS_N_INSNS (4), /* muldi */
802 COSTS_N_INSNS (20), /* divsi */
803 COSTS_N_INSNS (20), /* divdi */
804 COSTS_N_INSNS (3), /* fp */
805 COSTS_N_INSNS (3), /* dmul */
806 COSTS_N_INSNS (18), /* sdiv */
807 COSTS_N_INSNS (32), /* ddiv */
808 32, /* cache line size */
812 0, /* SF->DF convert */
815 /* Instruction costs on PPC604e processors. */
817 struct processor_costs ppc604e_cost = {
818 COSTS_N_INSNS (2), /* mulsi */
819 COSTS_N_INSNS (2), /* mulsi_const */
820 COSTS_N_INSNS (2), /* mulsi_const9 */
821 COSTS_N_INSNS (2), /* muldi */
822 COSTS_N_INSNS (20), /* divsi */
823 COSTS_N_INSNS (20), /* divdi */
824 COSTS_N_INSNS (3), /* fp */
825 COSTS_N_INSNS (3), /* dmul */
826 COSTS_N_INSNS (18), /* sdiv */
827 COSTS_N_INSNS (32), /* ddiv */
828 32, /* cache line size */
832 0, /* SF->DF convert */
835 /* Instruction costs on PPC620 processors. */
837 struct processor_costs ppc620_cost = {
838 COSTS_N_INSNS (5), /* mulsi */
839 COSTS_N_INSNS (4), /* mulsi_const */
840 COSTS_N_INSNS (3), /* mulsi_const9 */
841 COSTS_N_INSNS (7), /* muldi */
842 COSTS_N_INSNS (21), /* divsi */
843 COSTS_N_INSNS (37), /* divdi */
844 COSTS_N_INSNS (3), /* fp */
845 COSTS_N_INSNS (3), /* dmul */
846 COSTS_N_INSNS (18), /* sdiv */
847 COSTS_N_INSNS (32), /* ddiv */
848 128, /* cache line size */
852 0, /* SF->DF convert */
855 /* Instruction costs on PPC630 processors. */
857 struct processor_costs ppc630_cost = {
858 COSTS_N_INSNS (5), /* mulsi */
859 COSTS_N_INSNS (4), /* mulsi_const */
860 COSTS_N_INSNS (3), /* mulsi_const9 */
861 COSTS_N_INSNS (7), /* muldi */
862 COSTS_N_INSNS (21), /* divsi */
863 COSTS_N_INSNS (37), /* divdi */
864 COSTS_N_INSNS (3), /* fp */
865 COSTS_N_INSNS (3), /* dmul */
866 COSTS_N_INSNS (17), /* sdiv */
867 COSTS_N_INSNS (21), /* ddiv */
868 128, /* cache line size */
872 0, /* SF->DF convert */
875 /* Instruction costs on Cell processor. */
876 /* COSTS_N_INSNS (1) ~ one add. */
878 struct processor_costs ppccell_cost = {
879 COSTS_N_INSNS (9/2)+2, /* mulsi */
880 COSTS_N_INSNS (6/2), /* mulsi_const */
881 COSTS_N_INSNS (6/2), /* mulsi_const9 */
882 COSTS_N_INSNS (15/2)+2, /* muldi */
883 COSTS_N_INSNS (38/2), /* divsi */
884 COSTS_N_INSNS (70/2), /* divdi */
885 COSTS_N_INSNS (10/2), /* fp */
886 COSTS_N_INSNS (10/2), /* dmul */
887 COSTS_N_INSNS (74/2), /* sdiv */
888 COSTS_N_INSNS (74/2), /* ddiv */
889 128, /* cache line size */
893 0, /* SF->DF convert */
896 /* Instruction costs on PPC750 and PPC7400 processors. */
898 struct processor_costs ppc750_cost = {
899 COSTS_N_INSNS (5), /* mulsi */
900 COSTS_N_INSNS (3), /* mulsi_const */
901 COSTS_N_INSNS (2), /* mulsi_const9 */
902 COSTS_N_INSNS (5), /* muldi */
903 COSTS_N_INSNS (17), /* divsi */
904 COSTS_N_INSNS (17), /* divdi */
905 COSTS_N_INSNS (3), /* fp */
906 COSTS_N_INSNS (3), /* dmul */
907 COSTS_N_INSNS (17), /* sdiv */
908 COSTS_N_INSNS (31), /* ddiv */
909 32, /* cache line size */
913 0, /* SF->DF convert */
916 /* Instruction costs on PPC7450 processors. */
918 struct processor_costs ppc7450_cost = {
919 COSTS_N_INSNS (4), /* mulsi */
920 COSTS_N_INSNS (3), /* mulsi_const */
921 COSTS_N_INSNS (3), /* mulsi_const9 */
922 COSTS_N_INSNS (4), /* muldi */
923 COSTS_N_INSNS (23), /* divsi */
924 COSTS_N_INSNS (23), /* divdi */
925 COSTS_N_INSNS (5), /* fp */
926 COSTS_N_INSNS (5), /* dmul */
927 COSTS_N_INSNS (21), /* sdiv */
928 COSTS_N_INSNS (35), /* ddiv */
929 32, /* cache line size */
933 0, /* SF->DF convert */
936 /* Instruction costs on PPC8540 processors. */
938 struct processor_costs ppc8540_cost = {
939 COSTS_N_INSNS (4), /* mulsi */
940 COSTS_N_INSNS (4), /* mulsi_const */
941 COSTS_N_INSNS (4), /* mulsi_const9 */
942 COSTS_N_INSNS (4), /* muldi */
943 COSTS_N_INSNS (19), /* divsi */
944 COSTS_N_INSNS (19), /* divdi */
945 COSTS_N_INSNS (4), /* fp */
946 COSTS_N_INSNS (4), /* dmul */
947 COSTS_N_INSNS (29), /* sdiv */
948 COSTS_N_INSNS (29), /* ddiv */
949 32, /* cache line size */
952 1, /* prefetch streams /*/
953 0, /* SF->DF convert */
956 /* Instruction costs on E300C2 and E300C3 cores. */
958 struct processor_costs ppce300c2c3_cost = {
959 COSTS_N_INSNS (4), /* mulsi */
960 COSTS_N_INSNS (4), /* mulsi_const */
961 COSTS_N_INSNS (4), /* mulsi_const9 */
962 COSTS_N_INSNS (4), /* muldi */
963 COSTS_N_INSNS (19), /* divsi */
964 COSTS_N_INSNS (19), /* divdi */
965 COSTS_N_INSNS (3), /* fp */
966 COSTS_N_INSNS (4), /* dmul */
967 COSTS_N_INSNS (18), /* sdiv */
968 COSTS_N_INSNS (33), /* ddiv */
972 1, /* prefetch streams /*/
973 0, /* SF->DF convert */
976 /* Instruction costs on PPCE500MC processors. */
978 struct processor_costs ppce500mc_cost = {
979 COSTS_N_INSNS (4), /* mulsi */
980 COSTS_N_INSNS (4), /* mulsi_const */
981 COSTS_N_INSNS (4), /* mulsi_const9 */
982 COSTS_N_INSNS (4), /* muldi */
983 COSTS_N_INSNS (14), /* divsi */
984 COSTS_N_INSNS (14), /* divdi */
985 COSTS_N_INSNS (8), /* fp */
986 COSTS_N_INSNS (10), /* dmul */
987 COSTS_N_INSNS (36), /* sdiv */
988 COSTS_N_INSNS (66), /* ddiv */
989 64, /* cache line size */
992 1, /* prefetch streams /*/
993 0, /* SF->DF convert */
996 /* Instruction costs on PPCE500MC64 processors. */
998 struct processor_costs ppce500mc64_cost = {
999 COSTS_N_INSNS (4), /* mulsi */
1000 COSTS_N_INSNS (4), /* mulsi_const */
1001 COSTS_N_INSNS (4), /* mulsi_const9 */
1002 COSTS_N_INSNS (4), /* muldi */
1003 COSTS_N_INSNS (14), /* divsi */
1004 COSTS_N_INSNS (14), /* divdi */
1005 COSTS_N_INSNS (4), /* fp */
1006 COSTS_N_INSNS (10), /* dmul */
1007 COSTS_N_INSNS (36), /* sdiv */
1008 COSTS_N_INSNS (66), /* ddiv */
1009 64, /* cache line size */
1012 1, /* prefetch streams /*/
1013 0, /* SF->DF convert */
1016 /* Instruction costs on PPCE5500 processors. */
1018 struct processor_costs ppce5500_cost = {
1019 COSTS_N_INSNS (5), /* mulsi */
1020 COSTS_N_INSNS (5), /* mulsi_const */
1021 COSTS_N_INSNS (4), /* mulsi_const9 */
1022 COSTS_N_INSNS (5), /* muldi */
1023 COSTS_N_INSNS (14), /* divsi */
1024 COSTS_N_INSNS (14), /* divdi */
1025 COSTS_N_INSNS (7), /* fp */
1026 COSTS_N_INSNS (10), /* dmul */
1027 COSTS_N_INSNS (36), /* sdiv */
1028 COSTS_N_INSNS (66), /* ddiv */
1029 64, /* cache line size */
1032 1, /* prefetch streams /*/
1033 0, /* SF->DF convert */
1036 /* Instruction costs on PPCE6500 processors. */
1038 struct processor_costs ppce6500_cost = {
1039 COSTS_N_INSNS (5), /* mulsi */
1040 COSTS_N_INSNS (5), /* mulsi_const */
1041 COSTS_N_INSNS (4), /* mulsi_const9 */
1042 COSTS_N_INSNS (5), /* muldi */
1043 COSTS_N_INSNS (14), /* divsi */
1044 COSTS_N_INSNS (14), /* divdi */
1045 COSTS_N_INSNS (7), /* fp */
1046 COSTS_N_INSNS (10), /* dmul */
1047 COSTS_N_INSNS (36), /* sdiv */
1048 COSTS_N_INSNS (66), /* ddiv */
1049 64, /* cache line size */
1052 1, /* prefetch streams /*/
1053 0, /* SF->DF convert */
1056 /* Instruction costs on AppliedMicro Titan processors. */
1058 struct processor_costs titan_cost = {
1059 COSTS_N_INSNS (5), /* mulsi */
1060 COSTS_N_INSNS (5), /* mulsi_const */
1061 COSTS_N_INSNS (5), /* mulsi_const9 */
1062 COSTS_N_INSNS (5), /* muldi */
1063 COSTS_N_INSNS (18), /* divsi */
1064 COSTS_N_INSNS (18), /* divdi */
1065 COSTS_N_INSNS (10), /* fp */
1066 COSTS_N_INSNS (10), /* dmul */
1067 COSTS_N_INSNS (46), /* sdiv */
1068 COSTS_N_INSNS (72), /* ddiv */
1069 32, /* cache line size */
1072 1, /* prefetch streams /*/
1073 0, /* SF->DF convert */
1076 /* Instruction costs on POWER4 and POWER5 processors. */
1078 struct processor_costs power4_cost = {
1079 COSTS_N_INSNS (3), /* mulsi */
1080 COSTS_N_INSNS (2), /* mulsi_const */
1081 COSTS_N_INSNS (2), /* mulsi_const9 */
1082 COSTS_N_INSNS (4), /* muldi */
1083 COSTS_N_INSNS (18), /* divsi */
1084 COSTS_N_INSNS (34), /* divdi */
1085 COSTS_N_INSNS (3), /* fp */
1086 COSTS_N_INSNS (3), /* dmul */
1087 COSTS_N_INSNS (17), /* sdiv */
1088 COSTS_N_INSNS (17), /* ddiv */
1089 128, /* cache line size */
1091 1024, /* l2 cache */
1092 8, /* prefetch streams /*/
1093 0, /* SF->DF convert */
1096 /* Instruction costs on POWER6 processors. */
1098 struct processor_costs power6_cost = {
1099 COSTS_N_INSNS (8), /* mulsi */
1100 COSTS_N_INSNS (8), /* mulsi_const */
1101 COSTS_N_INSNS (8), /* mulsi_const9 */
1102 COSTS_N_INSNS (8), /* muldi */
1103 COSTS_N_INSNS (22), /* divsi */
1104 COSTS_N_INSNS (28), /* divdi */
1105 COSTS_N_INSNS (3), /* fp */
1106 COSTS_N_INSNS (3), /* dmul */
1107 COSTS_N_INSNS (13), /* sdiv */
1108 COSTS_N_INSNS (16), /* ddiv */
1109 128, /* cache line size */
1111 2048, /* l2 cache */
1112 16, /* prefetch streams */
1113 0, /* SF->DF convert */
1116 /* Instruction costs on POWER7 processors. */
1118 struct processor_costs power7_cost = {
1119 COSTS_N_INSNS (2), /* mulsi */
1120 COSTS_N_INSNS (2), /* mulsi_const */
1121 COSTS_N_INSNS (2), /* mulsi_const9 */
1122 COSTS_N_INSNS (2), /* muldi */
1123 COSTS_N_INSNS (18), /* divsi */
1124 COSTS_N_INSNS (34), /* divdi */
1125 COSTS_N_INSNS (3), /* fp */
1126 COSTS_N_INSNS (3), /* dmul */
1127 COSTS_N_INSNS (13), /* sdiv */
1128 COSTS_N_INSNS (16), /* ddiv */
1129 128, /* cache line size */
1132 12, /* prefetch streams */
1133 COSTS_N_INSNS (3), /* SF->DF convert */
1136 /* Instruction costs on POWER8 processors. */
1138 struct processor_costs power8_cost = {
1139 COSTS_N_INSNS (3), /* mulsi */
1140 COSTS_N_INSNS (3), /* mulsi_const */
1141 COSTS_N_INSNS (3), /* mulsi_const9 */
1142 COSTS_N_INSNS (3), /* muldi */
1143 COSTS_N_INSNS (19), /* divsi */
1144 COSTS_N_INSNS (35), /* divdi */
1145 COSTS_N_INSNS (3), /* fp */
1146 COSTS_N_INSNS (3), /* dmul */
1147 COSTS_N_INSNS (14), /* sdiv */
1148 COSTS_N_INSNS (17), /* ddiv */
1149 128, /* cache line size */
1152 12, /* prefetch streams */
1153 COSTS_N_INSNS (3), /* SF->DF convert */
1156 /* Instruction costs on POWER9 processors. */
1158 struct processor_costs power9_cost = {
1159 COSTS_N_INSNS (3), /* mulsi */
1160 COSTS_N_INSNS (3), /* mulsi_const */
1161 COSTS_N_INSNS (3), /* mulsi_const9 */
1162 COSTS_N_INSNS (3), /* muldi */
1163 COSTS_N_INSNS (8), /* divsi */
1164 COSTS_N_INSNS (12), /* divdi */
1165 COSTS_N_INSNS (3), /* fp */
1166 COSTS_N_INSNS (3), /* dmul */
1167 COSTS_N_INSNS (13), /* sdiv */
1168 COSTS_N_INSNS (18), /* ddiv */
1169 128, /* cache line size */
1172 8, /* prefetch streams */
1173 COSTS_N_INSNS (3), /* SF->DF convert */
1176 /* Instruction costs on POWER A2 processors. */
1178 struct processor_costs ppca2_cost = {
1179 COSTS_N_INSNS (16), /* mulsi */
1180 COSTS_N_INSNS (16), /* mulsi_const */
1181 COSTS_N_INSNS (16), /* mulsi_const9 */
1182 COSTS_N_INSNS (16), /* muldi */
1183 COSTS_N_INSNS (22), /* divsi */
1184 COSTS_N_INSNS (28), /* divdi */
1185 COSTS_N_INSNS (3), /* fp */
1186 COSTS_N_INSNS (3), /* dmul */
1187 COSTS_N_INSNS (59), /* sdiv */
1188 COSTS_N_INSNS (72), /* ddiv */
1191 2048, /* l2 cache */
1192 16, /* prefetch streams */
1193 0, /* SF->DF convert */
1197 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1198 #undef RS6000_BUILTIN_0
1199 #undef RS6000_BUILTIN_1
1200 #undef RS6000_BUILTIN_2
1201 #undef RS6000_BUILTIN_3
1202 #undef RS6000_BUILTIN_A
1203 #undef RS6000_BUILTIN_D
1204 #undef RS6000_BUILTIN_H
1205 #undef RS6000_BUILTIN_P
1206 #undef RS6000_BUILTIN_X
1208 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1209 { NAME, ICODE, MASK, ATTR },
1211 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1212 { NAME, ICODE, MASK, ATTR },
1214 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1215 { NAME, ICODE, MASK, ATTR },
1217 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1218 { NAME, ICODE, MASK, ATTR },
1220 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1221 { NAME, ICODE, MASK, ATTR },
1223 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1224 { NAME, ICODE, MASK, ATTR },
1226 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1227 { NAME, ICODE, MASK, ATTR },
1229 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1230 { NAME, ICODE, MASK, ATTR },
1232 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1233 { NAME, ICODE, MASK, ATTR },
1235 struct rs6000_builtin_info_type {
1237 const enum insn_code icode;
1238 const HOST_WIDE_INT mask;
1239 const unsigned attr;
1242 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1244 #include "rs6000-builtin.def"
1247 #undef RS6000_BUILTIN_0
1248 #undef RS6000_BUILTIN_1
1249 #undef RS6000_BUILTIN_2
1250 #undef RS6000_BUILTIN_3
1251 #undef RS6000_BUILTIN_A
1252 #undef RS6000_BUILTIN_D
1253 #undef RS6000_BUILTIN_H
1254 #undef RS6000_BUILTIN_P
1255 #undef RS6000_BUILTIN_X
1257 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1258 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1261 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1262 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1263 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1264 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1265 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1266 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1267 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1268 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1269 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1271 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1273 static bool is_microcoded_insn (rtx_insn *);
1274 static bool is_nonpipeline_insn (rtx_insn *);
1275 static bool is_cracked_insn (rtx_insn *);
1276 static bool is_load_insn (rtx, rtx *);
1277 static bool is_store_insn (rtx, rtx *);
1278 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1279 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1280 static bool insn_must_be_first_in_group (rtx_insn *);
1281 static bool insn_must_be_last_in_group (rtx_insn *);
1282 static void altivec_init_builtins (void);
1283 static tree builtin_function_type (machine_mode, machine_mode,
1284 machine_mode, machine_mode,
1285 enum rs6000_builtins, const char *name);
1286 static void rs6000_common_init_builtins (void);
1287 static void htm_init_builtins (void);
1288 int easy_vector_constant (rtx, machine_mode);
1289 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1290 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1291 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1294 static void macho_branch_islands (void);
1295 static tree get_prev_label (tree);
1297 static bool rs6000_mode_dependent_address (const_rtx);
1298 static bool rs6000_debug_mode_dependent_address (const_rtx);
1299 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1300 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1302 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1305 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1306 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1308 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1311 static bool rs6000_debug_can_change_mode_class (machine_mode,
1314 static rtx rs6000_internal_arg_pointer (void);
1316 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1317 = rs6000_mode_dependent_address;
1319 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1321 = rs6000_secondary_reload_class;
1323 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1324 = rs6000_preferred_reload_class;
1326 const int INSN_NOT_AVAILABLE = -1;
1328 static void rs6000_print_isa_options (FILE *, int, const char *,
1330 static void rs6000_print_builtin_options (FILE *, int, const char *,
1332 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1334 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1335 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1336 enum rs6000_reg_type,
1338 secondary_reload_info *,
1340 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1341 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1343 /* Hash table stuff for keeping track of TOC entries. */
1345 struct GTY((for_user)) toc_hash_struct
1347 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1348 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1350 machine_mode key_mode;
1354 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1356 static hashval_t hash (toc_hash_struct *);
1357 static bool equal (toc_hash_struct *, toc_hash_struct *);
1360 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1362 /* Hash table to keep track of the argument types for builtin functions. */
1364 struct GTY((for_user)) builtin_hash_struct
1367 machine_mode mode[4]; /* return value + 3 arguments. */
1368 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1371 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1373 static hashval_t hash (builtin_hash_struct *);
1374 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1377 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1380 /* Default register names. */
1381 char rs6000_reg_names[][8] =
1384 "0", "1", "2", "3", "4", "5", "6", "7",
1385 "8", "9", "10", "11", "12", "13", "14", "15",
1386 "16", "17", "18", "19", "20", "21", "22", "23",
1387 "24", "25", "26", "27", "28", "29", "30", "31",
1389 "0", "1", "2", "3", "4", "5", "6", "7",
1390 "8", "9", "10", "11", "12", "13", "14", "15",
1391 "16", "17", "18", "19", "20", "21", "22", "23",
1392 "24", "25", "26", "27", "28", "29", "30", "31",
1394 "0", "1", "2", "3", "4", "5", "6", "7",
1395 "8", "9", "10", "11", "12", "13", "14", "15",
1396 "16", "17", "18", "19", "20", "21", "22", "23",
1397 "24", "25", "26", "27", "28", "29", "30", "31",
1399 "lr", "ctr", "ca", "ap",
1401 "0", "1", "2", "3", "4", "5", "6", "7",
1402 /* vrsave vscr sfp */
1403 "vrsave", "vscr", "sfp",
1406 #ifdef TARGET_REGNAMES
1407 static const char alt_reg_names[][8] =
1410 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1411 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1412 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1413 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1415 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1416 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1417 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1418 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1420 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1421 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1422 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1423 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1425 "lr", "ctr", "ca", "ap",
1427 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1428 /* vrsave vscr sfp */
1429 "vrsave", "vscr", "sfp",
1433 /* Table of valid machine attributes. */
1435 static const struct attribute_spec rs6000_attribute_table[] =
1437 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1438 affects_type_identity, handler, exclude } */
1439 { "altivec", 1, 1, false, true, false, false,
1440 rs6000_handle_altivec_attribute, NULL },
1441 { "longcall", 0, 0, false, true, true, false,
1442 rs6000_handle_longcall_attribute, NULL },
1443 { "shortcall", 0, 0, false, true, true, false,
1444 rs6000_handle_longcall_attribute, NULL },
1445 { "ms_struct", 0, 0, false, false, false, false,
1446 rs6000_handle_struct_attribute, NULL },
1447 { "gcc_struct", 0, 0, false, false, false, false,
1448 rs6000_handle_struct_attribute, NULL },
1449 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1450 SUBTARGET_ATTRIBUTE_TABLE,
1452 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1455 #ifndef TARGET_PROFILE_KERNEL
1456 #define TARGET_PROFILE_KERNEL 0
1459 /* Initialize the GCC target structure. */
1460 #undef TARGET_ATTRIBUTE_TABLE
1461 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1462 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1463 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1464 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1465 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1467 #undef TARGET_ASM_ALIGNED_DI_OP
1468 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1470 /* Default unaligned ops are only provided for ELF. Find the ops needed
1471 for non-ELF systems. */
1472 #ifndef OBJECT_FORMAT_ELF
1474 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1476 #undef TARGET_ASM_UNALIGNED_HI_OP
1477 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1478 #undef TARGET_ASM_UNALIGNED_SI_OP
1479 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1480 #undef TARGET_ASM_UNALIGNED_DI_OP
1481 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1484 #undef TARGET_ASM_UNALIGNED_HI_OP
1485 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1486 #undef TARGET_ASM_UNALIGNED_SI_OP
1487 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1488 #undef TARGET_ASM_UNALIGNED_DI_OP
1489 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1490 #undef TARGET_ASM_ALIGNED_DI_OP
1491 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1495 /* This hook deals with fixups for relocatable code and DI-mode objects
1497 #undef TARGET_ASM_INTEGER
1498 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1500 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1501 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1502 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1505 #undef TARGET_SET_UP_BY_PROLOGUE
1506 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1508 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1509 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1510 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1511 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1512 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1513 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1514 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1515 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1516 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1517 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1518 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1519 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1521 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1522 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1524 #undef TARGET_INTERNAL_ARG_POINTER
1525 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1527 #undef TARGET_HAVE_TLS
1528 #define TARGET_HAVE_TLS HAVE_AS_TLS
1530 #undef TARGET_CANNOT_FORCE_CONST_MEM
1531 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1533 #undef TARGET_DELEGITIMIZE_ADDRESS
1534 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1536 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1537 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1539 #undef TARGET_LEGITIMATE_COMBINED_INSN
1540 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1542 #undef TARGET_ASM_FUNCTION_PROLOGUE
1543 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1544 #undef TARGET_ASM_FUNCTION_EPILOGUE
1545 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1547 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1548 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1550 #undef TARGET_LEGITIMIZE_ADDRESS
1551 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1553 #undef TARGET_SCHED_VARIABLE_ISSUE
1554 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1556 #undef TARGET_SCHED_ISSUE_RATE
1557 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1558 #undef TARGET_SCHED_ADJUST_COST
1559 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1560 #undef TARGET_SCHED_ADJUST_PRIORITY
1561 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1562 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1563 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1564 #undef TARGET_SCHED_INIT
1565 #define TARGET_SCHED_INIT rs6000_sched_init
1566 #undef TARGET_SCHED_FINISH
1567 #define TARGET_SCHED_FINISH rs6000_sched_finish
1568 #undef TARGET_SCHED_REORDER
1569 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1570 #undef TARGET_SCHED_REORDER2
1571 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1573 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1574 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1576 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1577 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1579 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1580 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1581 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1582 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1583 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1584 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1585 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1586 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1588 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1589 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1591 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1592 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1593 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1594 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1595 rs6000_builtin_support_vector_misalignment
1596 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1597 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1598 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1599 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1600 rs6000_builtin_vectorization_cost
1601 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1602 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1603 rs6000_preferred_simd_mode
1604 #undef TARGET_VECTORIZE_INIT_COST
1605 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1606 #undef TARGET_VECTORIZE_ADD_STMT_COST
1607 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1608 #undef TARGET_VECTORIZE_FINISH_COST
1609 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1610 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1611 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1613 #undef TARGET_INIT_BUILTINS
1614 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1615 #undef TARGET_BUILTIN_DECL
1616 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1618 #undef TARGET_FOLD_BUILTIN
1619 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1620 #undef TARGET_GIMPLE_FOLD_BUILTIN
1621 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1623 #undef TARGET_EXPAND_BUILTIN
1624 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1626 #undef TARGET_MANGLE_TYPE
1627 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1629 #undef TARGET_INIT_LIBFUNCS
1630 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1633 #undef TARGET_BINDS_LOCAL_P
1634 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1637 #undef TARGET_MS_BITFIELD_LAYOUT_P
1638 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1640 #undef TARGET_ASM_OUTPUT_MI_THUNK
1641 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1643 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1644 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1646 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1647 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1649 #undef TARGET_REGISTER_MOVE_COST
1650 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1651 #undef TARGET_MEMORY_MOVE_COST
1652 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1653 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1654 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1655 rs6000_ira_change_pseudo_allocno_class
1656 #undef TARGET_CANNOT_COPY_INSN_P
1657 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1658 #undef TARGET_RTX_COSTS
1659 #define TARGET_RTX_COSTS rs6000_rtx_costs
1660 #undef TARGET_ADDRESS_COST
1661 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1662 #undef TARGET_INSN_COST
1663 #define TARGET_INSN_COST rs6000_insn_cost
1665 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1666 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1668 #undef TARGET_PROMOTE_FUNCTION_MODE
1669 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1671 #undef TARGET_RETURN_IN_MEMORY
1672 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1674 #undef TARGET_RETURN_IN_MSB
1675 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1677 #undef TARGET_SETUP_INCOMING_VARARGS
1678 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1680 /* Always strict argument naming on rs6000. */
1681 #undef TARGET_STRICT_ARGUMENT_NAMING
1682 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1683 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1684 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1685 #undef TARGET_SPLIT_COMPLEX_ARG
1686 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1687 #undef TARGET_MUST_PASS_IN_STACK
1688 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1689 #undef TARGET_PASS_BY_REFERENCE
1690 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1691 #undef TARGET_ARG_PARTIAL_BYTES
1692 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1693 #undef TARGET_FUNCTION_ARG_ADVANCE
1694 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1695 #undef TARGET_FUNCTION_ARG
1696 #define TARGET_FUNCTION_ARG rs6000_function_arg
1697 #undef TARGET_FUNCTION_ARG_PADDING
1698 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1699 #undef TARGET_FUNCTION_ARG_BOUNDARY
1700 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1702 #undef TARGET_BUILD_BUILTIN_VA_LIST
1703 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1705 #undef TARGET_EXPAND_BUILTIN_VA_START
1706 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1709 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1711 #undef TARGET_EH_RETURN_FILTER_MODE
1712 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1714 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1715 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1717 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1718 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1720 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1721 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1723 #undef TARGET_FLOATN_MODE
1724 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1726 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1727 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1729 #undef TARGET_MD_ASM_ADJUST
1730 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1732 #undef TARGET_OPTION_OVERRIDE
1733 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1735 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1736 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1737 rs6000_builtin_vectorized_function
1739 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1740 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1741 rs6000_builtin_md_vectorized_function
1743 #undef TARGET_STACK_PROTECT_GUARD
1744 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1747 #undef TARGET_STACK_PROTECT_FAIL
1748 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1752 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1753 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1756 /* Use a 32-bit anchor range. This leads to sequences like:
1758 addis tmp,anchor,high
1761 where tmp itself acts as an anchor, and can be shared between
1762 accesses to the same 64k page. */
1763 #undef TARGET_MIN_ANCHOR_OFFSET
1764 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1765 #undef TARGET_MAX_ANCHOR_OFFSET
1766 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1767 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1768 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1769 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1770 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1772 #undef TARGET_BUILTIN_RECIPROCAL
1773 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1775 #undef TARGET_SECONDARY_RELOAD
1776 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1777 #undef TARGET_SECONDARY_MEMORY_NEEDED
1778 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1779 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1780 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1782 #undef TARGET_LEGITIMATE_ADDRESS_P
1783 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1785 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1786 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1788 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1789 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1791 #undef TARGET_CAN_ELIMINATE
1792 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1794 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1795 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1797 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1798 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1800 #undef TARGET_TRAMPOLINE_INIT
1801 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1803 #undef TARGET_FUNCTION_VALUE
1804 #define TARGET_FUNCTION_VALUE rs6000_function_value
1806 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1807 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1809 #undef TARGET_OPTION_SAVE
1810 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1812 #undef TARGET_OPTION_RESTORE
1813 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1815 #undef TARGET_OPTION_PRINT
1816 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1818 #undef TARGET_CAN_INLINE_P
1819 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1821 #undef TARGET_SET_CURRENT_FUNCTION
1822 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1824 #undef TARGET_LEGITIMATE_CONSTANT_P
1825 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1827 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1828 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1830 #undef TARGET_CAN_USE_DOLOOP_P
1831 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1833 #undef TARGET_PREDICT_DOLOOP_P
1834 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1836 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1837 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1839 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1840 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1841 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1842 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1843 #undef TARGET_UNWIND_WORD_MODE
1844 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1846 #undef TARGET_OFFLOAD_OPTIONS
1847 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1849 #undef TARGET_C_MODE_FOR_SUFFIX
1850 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1852 #undef TARGET_INVALID_BINARY_OP
1853 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1855 #undef TARGET_OPTAB_SUPPORTED_P
1856 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1858 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1859 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1861 #undef TARGET_COMPARE_VERSION_PRIORITY
1862 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1864 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1865 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1866 rs6000_generate_version_dispatcher_body
1868 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1869 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1870 rs6000_get_function_versions_dispatcher
1872 #undef TARGET_OPTION_FUNCTION_VERSIONS
1873 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1875 #undef TARGET_HARD_REGNO_NREGS
1876 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1877 #undef TARGET_HARD_REGNO_MODE_OK
1878 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1880 #undef TARGET_MODES_TIEABLE_P
1881 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1883 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1884 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1885 rs6000_hard_regno_call_part_clobbered
1887 #undef TARGET_SLOW_UNALIGNED_ACCESS
1888 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1890 #undef TARGET_CAN_CHANGE_MODE_CLASS
1891 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1893 #undef TARGET_CONSTANT_ALIGNMENT
1894 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1896 #undef TARGET_STARTING_FRAME_OFFSET
1897 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1899 #if TARGET_ELF && RS6000_WEAK
1900 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1901 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1904 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1905 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1907 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1908 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1911 /* Processor table. */
1914 const char *const name; /* Canonical processor name. */
1915 const enum processor_type processor; /* Processor type enum value. */
1916 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1919 static struct rs6000_ptt const processor_target_table[] =
1921 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1922 #include "rs6000-cpus.def"
1926 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1930 rs6000_cpu_name_lookup (const char *name)
1936 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1937 if (! strcmp (name, processor_target_table[i].name))
1945 /* Return number of consecutive hard regs needed starting at reg REGNO
1946 to hold something of mode MODE.
1947 This is ordinarily the length in words of a value of mode MODE
1948 but can be less for certain modes in special long registers.
1950 POWER and PowerPC GPRs hold 32 bits worth;
1951 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1954 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1956 unsigned HOST_WIDE_INT reg_size;
1958 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1959 128-bit floating point that can go in vector registers, which has VSX
1960 memory addressing. */
1961 if (FP_REGNO_P (regno))
1962 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1963 ? UNITS_PER_VSX_WORD
1964 : UNITS_PER_FP_WORD);
1966 else if (ALTIVEC_REGNO_P (regno))
1967 reg_size = UNITS_PER_ALTIVEC_WORD;
1970 reg_size = UNITS_PER_WORD;
1972 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1975 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1978 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1980 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1982 if (COMPLEX_MODE_P (mode))
1983 mode = GET_MODE_INNER (mode);
1985 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1986 register combinations, and use PTImode where we need to deal with quad
1987 word memory operations. Don't allow quad words in the argument or frame
1988 pointer registers, just registers 0..31. */
1989 if (mode == PTImode)
1990 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1991 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1992 && ((regno & 1) == 0));
1994 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1995 implementations. Don't allow an item to be split between a FP register
1996 and an Altivec register. Allow TImode in all VSX registers if the user
1998 if (TARGET_VSX && VSX_REGNO_P (regno)
1999 && (VECTOR_MEM_VSX_P (mode)
2000 || FLOAT128_VECTOR_P (mode)
2001 || reg_addr[mode].scalar_in_vmx_p
2003 || (TARGET_VADDUQM && mode == V1TImode)))
2005 if (FP_REGNO_P (regno))
2006 return FP_REGNO_P (last_regno);
2008 if (ALTIVEC_REGNO_P (regno))
2010 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2013 return ALTIVEC_REGNO_P (last_regno);
2017 /* The GPRs can hold any mode, but values bigger than one register
2018 cannot go past R31. */
2019 if (INT_REGNO_P (regno))
2020 return INT_REGNO_P (last_regno);
2022 /* The float registers (except for VSX vector modes) can only hold floating
2023 modes and DImode. */
2024 if (FP_REGNO_P (regno))
2026 if (FLOAT128_VECTOR_P (mode))
2029 if (SCALAR_FLOAT_MODE_P (mode)
2030 && (mode != TDmode || (regno % 2) == 0)
2031 && FP_REGNO_P (last_regno))
2034 if (GET_MODE_CLASS (mode) == MODE_INT)
2036 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2039 if (TARGET_P8_VECTOR && (mode == SImode))
2042 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2049 /* The CR register can only hold CC modes. */
2050 if (CR_REGNO_P (regno))
2051 return GET_MODE_CLASS (mode) == MODE_CC;
2053 if (CA_REGNO_P (regno))
2054 return mode == Pmode || mode == SImode;
2056 /* AltiVec only in AldyVec registers. */
2057 if (ALTIVEC_REGNO_P (regno))
2058 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2059 || mode == V1TImode);
2061 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2062 and it must be able to fit within the register set. */
2064 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2067 /* Implement TARGET_HARD_REGNO_NREGS. */
2070 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2072 return rs6000_hard_regno_nregs[mode][regno];
2075 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2078 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2080 return rs6000_hard_regno_mode_ok_p[mode][regno];
2083 /* Implement TARGET_MODES_TIEABLE_P.
2085 PTImode cannot tie with other modes because PTImode is restricted to even
2086 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2089 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2090 128-bit floating point on VSX systems ties with other vectors. */
2093 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2095 if (mode1 == PTImode)
2096 return mode2 == PTImode;
2097 if (mode2 == PTImode)
2100 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2101 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2102 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2105 if (SCALAR_FLOAT_MODE_P (mode1))
2106 return SCALAR_FLOAT_MODE_P (mode2);
2107 if (SCALAR_FLOAT_MODE_P (mode2))
2110 if (GET_MODE_CLASS (mode1) == MODE_CC)
2111 return GET_MODE_CLASS (mode2) == MODE_CC;
2112 if (GET_MODE_CLASS (mode2) == MODE_CC)
2118 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2121 rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
2122 unsigned int regno, machine_mode mode)
2126 && GET_MODE_SIZE (mode) > 4
2127 && INT_REGNO_P (regno))
2131 && FP_REGNO_P (regno)
2132 && GET_MODE_SIZE (mode) > 8
2133 && !FLOAT128_2REG_P (mode))
2139 /* Print interesting facts about registers. */
2141 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2145 for (r = first_regno; r <= last_regno; ++r)
2147 const char *comma = "";
2150 if (first_regno == last_regno)
2151 fprintf (stderr, "%s:\t", reg_name);
2153 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2156 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2157 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2161 fprintf (stderr, ",\n\t");
2166 if (rs6000_hard_regno_nregs[m][r] > 1)
2167 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2168 rs6000_hard_regno_nregs[m][r]);
2170 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2175 if (call_used_regs[r])
2179 fprintf (stderr, ",\n\t");
2184 len += fprintf (stderr, "%s%s", comma, "call-used");
2192 fprintf (stderr, ",\n\t");
2197 len += fprintf (stderr, "%s%s", comma, "fixed");
2203 fprintf (stderr, ",\n\t");
2207 len += fprintf (stderr, "%sreg-class = %s", comma,
2208 reg_class_names[(int)rs6000_regno_regclass[r]]);
2213 fprintf (stderr, ",\n\t");
2217 fprintf (stderr, "%sregno = %d\n", comma, r);
2222 rs6000_debug_vector_unit (enum rs6000_vector v)
2228 case VECTOR_NONE: ret = "none"; break;
2229 case VECTOR_ALTIVEC: ret = "altivec"; break;
2230 case VECTOR_VSX: ret = "vsx"; break;
2231 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2232 default: ret = "unknown"; break;
2238 /* Inner function printing just the address mask for a particular reload
2240 DEBUG_FUNCTION char *
2241 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2246 if ((mask & RELOAD_REG_VALID) != 0)
2248 else if (keep_spaces)
2251 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2253 else if (keep_spaces)
2256 if ((mask & RELOAD_REG_INDEXED) != 0)
2258 else if (keep_spaces)
2261 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2263 else if ((mask & RELOAD_REG_OFFSET) != 0)
2265 else if (keep_spaces)
2268 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2270 else if (keep_spaces)
2273 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2275 else if (keep_spaces)
2278 if ((mask & RELOAD_REG_AND_M16) != 0)
2280 else if (keep_spaces)
2288 /* Print the address masks in a human readble fashion. */
2290 rs6000_debug_print_mode (ssize_t m)
2295 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2296 for (rc = 0; rc < N_RELOAD_REG; rc++)
2297 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2298 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2300 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2301 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2303 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2304 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2305 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2309 spaces += sizeof (" Reload=sl") - 1;
2311 if (reg_addr[m].scalar_in_vmx_p)
2313 fprintf (stderr, "%*s Upper=y", spaces, "");
2317 spaces += sizeof (" Upper=y") - 1;
2319 if (rs6000_vector_unit[m] != VECTOR_NONE
2320 || rs6000_vector_mem[m] != VECTOR_NONE)
2322 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2324 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2325 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2328 fputs ("\n", stderr);
2331 #define DEBUG_FMT_ID "%-32s= "
2332 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2333 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2334 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2336 /* Print various interesting information with -mdebug=reg. */
2338 rs6000_debug_reg_global (void)
2340 static const char *const tf[2] = { "false", "true" };
2341 const char *nl = (const char *)0;
2344 char costly_num[20];
2346 char flags_buffer[40];
2347 const char *costly_str;
2348 const char *nop_str;
2349 const char *trace_str;
2350 const char *abi_str;
2351 const char *cmodel_str;
2352 struct cl_target_option cl_opts;
2354 /* Modes we want tieable information on. */
2355 static const machine_mode print_tieable_modes[] = {
2389 /* Virtual regs we are interested in. */
2390 const static struct {
2391 int regno; /* register number. */
2392 const char *name; /* register name. */
2393 } virtual_regs[] = {
2394 { STACK_POINTER_REGNUM, "stack pointer:" },
2395 { TOC_REGNUM, "toc: " },
2396 { STATIC_CHAIN_REGNUM, "static chain: " },
2397 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2398 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2399 { ARG_POINTER_REGNUM, "arg pointer: " },
2400 { FRAME_POINTER_REGNUM, "frame pointer:" },
2401 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2402 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2403 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2404 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2405 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2406 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2407 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2408 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2409 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2412 fputs ("\nHard register information:\n", stderr);
2413 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2414 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2415 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2418 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2419 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2420 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2421 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2422 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2423 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2425 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2426 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2427 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2431 "d reg_class = %s\n"
2432 "f reg_class = %s\n"
2433 "v reg_class = %s\n"
2434 "wa reg_class = %s\n"
2435 "we reg_class = %s\n"
2436 "wr reg_class = %s\n"
2437 "wx reg_class = %s\n"
2438 "wA reg_class = %s\n"
2440 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2441 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2442 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2443 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2444 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2445 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2446 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2447 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2450 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2451 rs6000_debug_print_mode (m);
2453 fputs ("\n", stderr);
2455 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2457 machine_mode mode1 = print_tieable_modes[m1];
2458 bool first_time = true;
2460 nl = (const char *)0;
2461 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2463 machine_mode mode2 = print_tieable_modes[m2];
2464 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2468 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2473 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2478 fputs ("\n", stderr);
2484 if (rs6000_recip_control)
2486 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2488 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2489 if (rs6000_recip_bits[m])
2492 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2494 (RS6000_RECIP_AUTO_RE_P (m)
2496 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2497 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2499 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2502 fputs ("\n", stderr);
2505 if (rs6000_cpu_index >= 0)
2507 const char *name = processor_target_table[rs6000_cpu_index].name;
2509 = processor_target_table[rs6000_cpu_index].target_enable;
2511 sprintf (flags_buffer, "-mcpu=%s flags", name);
2512 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2515 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2517 if (rs6000_tune_index >= 0)
2519 const char *name = processor_target_table[rs6000_tune_index].name;
2521 = processor_target_table[rs6000_tune_index].target_enable;
2523 sprintf (flags_buffer, "-mtune=%s flags", name);
2524 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2527 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2529 cl_target_option_save (&cl_opts, &global_options);
2530 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2533 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2534 rs6000_isa_flags_explicit);
2536 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2537 rs6000_builtin_mask);
2539 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2541 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2542 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2544 switch (rs6000_sched_costly_dep)
2546 case max_dep_latency:
2547 costly_str = "max_dep_latency";
2551 costly_str = "no_dep_costly";
2554 case all_deps_costly:
2555 costly_str = "all_deps_costly";
2558 case true_store_to_load_dep_costly:
2559 costly_str = "true_store_to_load_dep_costly";
2562 case store_to_load_dep_costly:
2563 costly_str = "store_to_load_dep_costly";
2567 costly_str = costly_num;
2568 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2572 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2574 switch (rs6000_sched_insert_nops)
2576 case sched_finish_regroup_exact:
2577 nop_str = "sched_finish_regroup_exact";
2580 case sched_finish_pad_groups:
2581 nop_str = "sched_finish_pad_groups";
2584 case sched_finish_none:
2585 nop_str = "sched_finish_none";
2590 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2594 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2596 switch (rs6000_sdata)
2603 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2607 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2611 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2616 switch (rs6000_traceback)
2618 case traceback_default: trace_str = "default"; break;
2619 case traceback_none: trace_str = "none"; break;
2620 case traceback_part: trace_str = "part"; break;
2621 case traceback_full: trace_str = "full"; break;
2622 default: trace_str = "unknown"; break;
2625 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2627 switch (rs6000_current_cmodel)
2629 case CMODEL_SMALL: cmodel_str = "small"; break;
2630 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2631 case CMODEL_LARGE: cmodel_str = "large"; break;
2632 default: cmodel_str = "unknown"; break;
2635 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2637 switch (rs6000_current_abi)
2639 case ABI_NONE: abi_str = "none"; break;
2640 case ABI_AIX: abi_str = "aix"; break;
2641 case ABI_ELFv2: abi_str = "ELFv2"; break;
2642 case ABI_V4: abi_str = "V4"; break;
2643 case ABI_DARWIN: abi_str = "darwin"; break;
2644 default: abi_str = "unknown"; break;
2647 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2649 if (rs6000_altivec_abi)
2650 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2652 if (rs6000_darwin64_abi)
2653 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2655 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2656 (TARGET_SOFT_FLOAT ? "true" : "false"));
2658 if (TARGET_LINK_STACK)
2659 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2661 if (TARGET_P8_FUSION)
2665 strcpy (options, "power8");
2666 if (TARGET_P8_FUSION_SIGN)
2667 strcat (options, ", sign");
2669 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2672 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2673 TARGET_SECURE_PLT ? "secure" : "bss");
2674 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2675 aix_struct_return ? "aix" : "sysv");
2676 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2677 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2678 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2679 tf[!!rs6000_align_branch_targets]);
2680 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2681 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2682 rs6000_long_double_type_size);
2683 if (rs6000_long_double_type_size > 64)
2685 fprintf (stderr, DEBUG_FMT_S, "long double type",
2686 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2687 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2688 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2690 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2691 (int)rs6000_sched_restricted_insns_priority);
2692 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2694 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2695 (int)RS6000_BUILTIN_COUNT);
2697 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2698 (int)TARGET_FLOAT128_ENABLE_TYPE);
2701 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2702 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2704 if (TARGET_DIRECT_MOVE_128)
2705 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2706 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2710 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2711 legitimate address support to figure out the appropriate addressing to
2715 rs6000_setup_reg_addr_masks (void)
2717 ssize_t rc, reg, m, nregs;
2718 addr_mask_type any_addr_mask, addr_mask;
2720 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2722 machine_mode m2 = (machine_mode) m;
2723 bool complex_p = false;
2724 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2727 if (COMPLEX_MODE_P (m2))
2730 m2 = GET_MODE_INNER (m2);
2733 msize = GET_MODE_SIZE (m2);
2735 /* SDmode is special in that we want to access it only via REG+REG
2736 addressing on power7 and above, since we want to use the LFIWZX and
2737 STFIWZX instructions to load it. */
2738 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2741 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2744 reg = reload_reg_map[rc].reg;
2746 /* Can mode values go in the GPR/FPR/Altivec registers? */
2747 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2749 bool small_int_vsx_p = (small_int_p
2750 && (rc == RELOAD_REG_FPR
2751 || rc == RELOAD_REG_VMX));
2753 nregs = rs6000_hard_regno_nregs[m][reg];
2754 addr_mask |= RELOAD_REG_VALID;
2756 /* Indicate if the mode takes more than 1 physical register. If
2757 it takes a single register, indicate it can do REG+REG
2758 addressing. Small integers in VSX registers can only do
2759 REG+REG addressing. */
2760 if (small_int_vsx_p)
2761 addr_mask |= RELOAD_REG_INDEXED;
2762 else if (nregs > 1 || m == BLKmode || complex_p)
2763 addr_mask |= RELOAD_REG_MULTIPLE;
2765 addr_mask |= RELOAD_REG_INDEXED;
2767 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2768 addressing. If we allow scalars into Altivec registers,
2769 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2771 For VSX systems, we don't allow update addressing for
2772 DFmode/SFmode if those registers can go in both the
2773 traditional floating point registers and Altivec registers.
2774 The load/store instructions for the Altivec registers do not
2775 have update forms. If we allowed update addressing, it seems
2776 to break IV-OPT code using floating point if the index type is
2777 int instead of long (PR target/81550 and target/84042). */
2780 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2782 && !VECTOR_MODE_P (m2)
2783 && !FLOAT128_VECTOR_P (m2)
2785 && (m != E_DFmode || !TARGET_VSX)
2786 && (m != E_SFmode || !TARGET_P8_VECTOR)
2787 && !small_int_vsx_p)
2789 addr_mask |= RELOAD_REG_PRE_INCDEC;
2791 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2792 we don't allow PRE_MODIFY for some multi-register
2797 addr_mask |= RELOAD_REG_PRE_MODIFY;
2801 if (TARGET_POWERPC64)
2802 addr_mask |= RELOAD_REG_PRE_MODIFY;
2807 if (TARGET_HARD_FLOAT)
2808 addr_mask |= RELOAD_REG_PRE_MODIFY;
2814 /* GPR and FPR registers can do REG+OFFSET addressing, except
2815 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2816 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2817 if ((addr_mask != 0) && !indexed_only_p
2819 && (rc == RELOAD_REG_GPR
2820 || ((msize == 8 || m2 == SFmode)
2821 && (rc == RELOAD_REG_FPR
2822 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2823 addr_mask |= RELOAD_REG_OFFSET;
2825 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2826 instructions are enabled. The offset for 128-bit VSX registers is
2827 only 12-bits. While GPRs can handle the full offset range, VSX
2828 registers can only handle the restricted range. */
2829 else if ((addr_mask != 0) && !indexed_only_p
2830 && msize == 16 && TARGET_P9_VECTOR
2831 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2832 || (m2 == TImode && TARGET_VSX)))
2834 addr_mask |= RELOAD_REG_OFFSET;
2835 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2836 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2839 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2840 addressing on 128-bit types. */
2841 if (rc == RELOAD_REG_VMX && msize == 16
2842 && (addr_mask & RELOAD_REG_VALID) != 0)
2843 addr_mask |= RELOAD_REG_AND_M16;
2845 reg_addr[m].addr_mask[rc] = addr_mask;
2846 any_addr_mask |= addr_mask;
2849 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2854 /* Initialize the various global tables that are based on register size. */
2856 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2862 /* Precalculate REGNO_REG_CLASS. */
2863 rs6000_regno_regclass[0] = GENERAL_REGS;
2864 for (r = 1; r < 32; ++r)
2865 rs6000_regno_regclass[r] = BASE_REGS;
2867 for (r = 32; r < 64; ++r)
2868 rs6000_regno_regclass[r] = FLOAT_REGS;
2870 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2871 rs6000_regno_regclass[r] = NO_REGS;
2873 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2874 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2876 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2877 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2878 rs6000_regno_regclass[r] = CR_REGS;
2880 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2881 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2882 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2883 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2884 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2885 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2886 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2888 /* Precalculate register class to simpler reload register class. We don't
2889 need all of the register classes that are combinations of different
2890 classes, just the simple ones that have constraint letters. */
2891 for (c = 0; c < N_REG_CLASSES; c++)
2892 reg_class_to_reg_type[c] = NO_REG_TYPE;
2894 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2895 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2896 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2897 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2898 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2899 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2900 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2901 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2902 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2903 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2907 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2908 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2912 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2913 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2916 /* Precalculate the valid memory formats as well as the vector information,
2917 this must be set up before the rs6000_hard_regno_nregs_internal calls
2919 gcc_assert ((int)VECTOR_NONE == 0);
2920 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2921 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2923 gcc_assert ((int)CODE_FOR_nothing == 0);
2924 memset ((void *) ®_addr[0], '\0', sizeof (reg_addr));
2926 gcc_assert ((int)NO_REGS == 0);
2927 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2929 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2930 believes it can use native alignment or still uses 128-bit alignment. */
2931 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2942 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2943 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2944 if (TARGET_FLOAT128_TYPE)
2946 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2947 rs6000_vector_align[KFmode] = 128;
2949 if (FLOAT128_IEEE_P (TFmode))
2951 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2952 rs6000_vector_align[TFmode] = 128;
2956 /* V2DF mode, VSX only. */
2959 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2960 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2961 rs6000_vector_align[V2DFmode] = align64;
2964 /* V4SF mode, either VSX or Altivec. */
2967 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2968 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2969 rs6000_vector_align[V4SFmode] = align32;
2971 else if (TARGET_ALTIVEC)
2973 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2974 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2975 rs6000_vector_align[V4SFmode] = align32;
2978 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2982 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2983 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2984 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2985 rs6000_vector_align[V4SImode] = align32;
2986 rs6000_vector_align[V8HImode] = align32;
2987 rs6000_vector_align[V16QImode] = align32;
2991 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2992 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2993 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2997 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2998 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2999 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3003 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3004 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3007 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3008 rs6000_vector_unit[V2DImode]
3009 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3010 rs6000_vector_align[V2DImode] = align64;
3012 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3013 rs6000_vector_unit[V1TImode]
3014 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3015 rs6000_vector_align[V1TImode] = 128;
3018 /* DFmode, see if we want to use the VSX unit. Memory is handled
3019 differently, so don't set rs6000_vector_mem. */
3022 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3023 rs6000_vector_align[DFmode] = 64;
3026 /* SFmode, see if we want to use the VSX unit. */
3027 if (TARGET_P8_VECTOR)
3029 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3030 rs6000_vector_align[SFmode] = 32;
3033 /* Allow TImode in VSX register and set the VSX memory macros. */
3036 rs6000_vector_mem[TImode] = VECTOR_VSX;
3037 rs6000_vector_align[TImode] = align64;
3040 /* Register class constraints for the constraints that depend on compile
3041 switches. When the VSX code was added, different constraints were added
3042 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3043 of the VSX registers are used. The register classes for scalar floating
3044 point types is set, based on whether we allow that type into the upper
3045 (Altivec) registers. GCC has register classes to target the Altivec
3046 registers for load/store operations, to select using a VSX memory
3047 operation instead of the traditional floating point operation. The
3050 d - Register class to use with traditional DFmode instructions.
3051 f - Register class to use with traditional SFmode instructions.
3052 v - Altivec register.
3053 wa - Any VSX register.
3054 wc - Reserved to represent individual CR bits (used in LLVM).
3055 wn - always NO_REGS.
3056 wr - GPR if 64-bit mode is permitted.
3057 wx - Float register if we can do 32-bit int stores. */
3059 if (TARGET_HARD_FLOAT)
3061 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3062 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3066 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3068 /* Add conditional constraints based on various options, to allow us to
3069 collapse multiple insn patterns. */
3071 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3073 if (TARGET_POWERPC64)
3075 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3076 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3080 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3082 /* Support for new direct moves (ISA 3.0 + 64bit). */
3083 if (TARGET_DIRECT_MOVE_128)
3084 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3086 /* Set up the reload helper and direct move functions. */
3087 if (TARGET_VSX || TARGET_ALTIVEC)
3091 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3092 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3093 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3094 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3095 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3096 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3097 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3098 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3099 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3100 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3101 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3102 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3103 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3104 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3105 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3106 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3107 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3108 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3109 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3110 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3112 if (FLOAT128_VECTOR_P (KFmode))
3114 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3115 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3118 if (FLOAT128_VECTOR_P (TFmode))
3120 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3121 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3124 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3126 if (TARGET_NO_SDMODE_STACK)
3128 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3129 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3134 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3135 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3138 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3140 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3141 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3142 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3143 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3144 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3145 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3146 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3147 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3148 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3150 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3151 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3152 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3153 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3154 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3155 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3156 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3157 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3158 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3160 if (FLOAT128_VECTOR_P (KFmode))
3162 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3163 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3166 if (FLOAT128_VECTOR_P (TFmode))
3168 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3169 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3175 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3176 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3177 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3178 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3179 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3180 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3181 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3182 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3183 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3184 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3185 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3186 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3187 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3188 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3189 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3190 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3191 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3192 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3193 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3194 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3196 if (FLOAT128_VECTOR_P (KFmode))
3198 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3199 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3202 if (FLOAT128_IEEE_P (TFmode))
3204 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3205 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3208 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3210 if (TARGET_NO_SDMODE_STACK)
3212 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3213 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3218 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3219 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3222 if (TARGET_DIRECT_MOVE)
3224 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3225 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3226 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3230 reg_addr[DFmode].scalar_in_vmx_p = true;
3231 reg_addr[DImode].scalar_in_vmx_p = true;
3233 if (TARGET_P8_VECTOR)
3235 reg_addr[SFmode].scalar_in_vmx_p = true;
3236 reg_addr[SImode].scalar_in_vmx_p = true;
3238 if (TARGET_P9_VECTOR)
3240 reg_addr[HImode].scalar_in_vmx_p = true;
3241 reg_addr[QImode].scalar_in_vmx_p = true;
3246 /* Precalculate HARD_REGNO_NREGS. */
3247 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3248 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3249 rs6000_hard_regno_nregs[m][r]
3250 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3252 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3253 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3254 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3255 rs6000_hard_regno_mode_ok_p[m][r]
3256 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3258 /* Precalculate CLASS_MAX_NREGS sizes. */
3259 for (c = 0; c < LIM_REG_CLASSES; ++c)
3263 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3264 reg_size = UNITS_PER_VSX_WORD;
3266 else if (c == ALTIVEC_REGS)
3267 reg_size = UNITS_PER_ALTIVEC_WORD;
3269 else if (c == FLOAT_REGS)
3270 reg_size = UNITS_PER_FP_WORD;
3273 reg_size = UNITS_PER_WORD;
3275 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3277 machine_mode m2 = (machine_mode)m;
3278 int reg_size2 = reg_size;
3280 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3282 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3283 reg_size2 = UNITS_PER_FP_WORD;
3285 rs6000_class_max_nregs[m][c]
3286 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3290 /* Calculate which modes to automatically generate code to use a the
3291 reciprocal divide and square root instructions. In the future, possibly
3292 automatically generate the instructions even if the user did not specify
3293 -mrecip. The older machines double precision reciprocal sqrt estimate is
3294 not accurate enough. */
3295 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3297 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3299 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3300 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3301 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3302 if (VECTOR_UNIT_VSX_P (V2DFmode))
3303 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3305 if (TARGET_FRSQRTES)
3306 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3308 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3309 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3310 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3311 if (VECTOR_UNIT_VSX_P (V2DFmode))
3312 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3314 if (rs6000_recip_control)
3316 if (!flag_finite_math_only)
3317 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3319 if (flag_trapping_math)
3320 warning (0, "%qs requires %qs or %qs", "-mrecip",
3321 "-fno-trapping-math", "-ffast-math");
3322 if (!flag_reciprocal_math)
3323 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3325 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3327 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3328 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3329 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3331 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3332 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3333 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3335 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3336 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3337 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3339 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3340 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3341 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3343 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3344 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3345 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3347 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3348 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3349 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3351 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3352 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3353 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3355 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3356 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3357 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3361 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3362 legitimate address support to figure out the appropriate addressing to
3364 rs6000_setup_reg_addr_masks ();
3366 if (global_init_p || TARGET_DEBUG_TARGET)
3368 if (TARGET_DEBUG_REG)
3369 rs6000_debug_reg_global ();
3371 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3373 "SImode variable mult cost = %d\n"
3374 "SImode constant mult cost = %d\n"
3375 "SImode short constant mult cost = %d\n"
3376 "DImode multipliciation cost = %d\n"
3377 "SImode division cost = %d\n"
3378 "DImode division cost = %d\n"
3379 "Simple fp operation cost = %d\n"
3380 "DFmode multiplication cost = %d\n"
3381 "SFmode division cost = %d\n"
3382 "DFmode division cost = %d\n"
3383 "cache line size = %d\n"
3384 "l1 cache size = %d\n"
3385 "l2 cache size = %d\n"
3386 "simultaneous prefetches = %d\n"
3389 rs6000_cost->mulsi_const,
3390 rs6000_cost->mulsi_const9,
3398 rs6000_cost->cache_line_size,
3399 rs6000_cost->l1_cache_size,
3400 rs6000_cost->l2_cache_size,
3401 rs6000_cost->simultaneous_prefetches);
3406 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3409 darwin_rs6000_override_options (void)
3411 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3413 rs6000_altivec_abi = 1;
3414 TARGET_ALTIVEC_VRSAVE = 1;
3415 rs6000_current_abi = ABI_DARWIN;
3417 if (DEFAULT_ABI == ABI_DARWIN
3419 darwin_one_byte_bool = 1;
3421 if (TARGET_64BIT && ! TARGET_POWERPC64)
3423 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3424 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3428 rs6000_default_long_calls = 1;
3429 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3432 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3434 if (!flag_mkernel && !flag_apple_kext
3436 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3437 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3439 /* Unless the user (not the configurer) has explicitly overridden
3440 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3441 G4 unless targeting the kernel. */
3444 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3445 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3446 && ! global_options_set.x_rs6000_cpu_index)
3448 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3453 /* If not otherwise specified by a target, make 'long double' equivalent to
3456 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3457 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3460 /* Return the builtin mask of the various options used that could affect which
3461 builtins were used. In the past we used target_flags, but we've run out of
3462 bits, and some options are no longer in target_flags. */
3465 rs6000_builtin_mask_calculate (void)
3467 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3468 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3469 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3470 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3471 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3472 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3473 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3474 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3475 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3476 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3477 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3478 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3479 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3480 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3481 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3482 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3483 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3484 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3485 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3486 | ((TARGET_LONG_DOUBLE_128
3487 && TARGET_HARD_FLOAT
3488 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3489 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3490 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3493 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3494 to clobber the XER[CA] bit because clobbering that bit without telling
3495 the compiler worked just fine with versions of GCC before GCC 5, and
3496 breaking a lot of older code in ways that are hard to track down is
3497 not such a great idea. */
3500 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3501 vec<const char *> &/*constraints*/,
3502 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3504 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3505 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3509 /* Override command line options.
3511 Combine build-specific configuration information with options
3512 specified on the command line to set various state variables which
3513 influence code generation, optimization, and expansion of built-in
3514 functions. Assure that command-line configuration preferences are
3515 compatible with each other and with the build configuration; issue
3516 warnings while adjusting configuration or error messages while
3517 rejecting configuration.
3519 Upon entry to this function:
3521 This function is called once at the beginning of
3522 compilation, and then again at the start and end of compiling
3523 each section of code that has a different configuration, as
3524 indicated, for example, by adding the
3526 __attribute__((__target__("cpu=power9")))
3528 qualifier to a function definition or, for example, by bracketing
3531 #pragma GCC target("altivec")
3535 #pragma GCC reset_options
3537 directives. Parameter global_init_p is true for the initial
3538 invocation, which initializes global variables, and false for all
3539 subsequent invocations.
3542 Various global state information is assumed to be valid. This
3543 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3544 default CPU specified at build configure time, TARGET_DEFAULT,
3545 representing the default set of option flags for the default
3546 target, and global_options_set.x_rs6000_isa_flags, representing
3547 which options were requested on the command line.
3549 Upon return from this function:
3551 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3552 was set by name on the command line. Additionally, if certain
3553 attributes are automatically enabled or disabled by this function
3554 in order to assure compatibility between options and
3555 configuration, the flags associated with those attributes are
3556 also set. By setting these "explicit bits", we avoid the risk
3557 that other code might accidentally overwrite these particular
3558 attributes with "default values".
3560 The various bits of rs6000_isa_flags are set to indicate the
3561 target options that have been selected for the most current
3562 compilation efforts. This has the effect of also turning on the
3563 associated TARGET_XXX values since these are macros which are
3564 generally defined to test the corresponding bit of the
3565 rs6000_isa_flags variable.
3567 The variable rs6000_builtin_mask is set to represent the target
3568 options for the most current compilation efforts, consistent with
3569 the current contents of rs6000_isa_flags. This variable controls
3570 expansion of built-in functions.
3572 Various other global variables and fields of global structures
3573 (over 50 in all) are initialized to reflect the desired options
3574 for the most current compilation efforts. */
3577 rs6000_option_override_internal (bool global_init_p)
3581 HOST_WIDE_INT set_masks;
3582 HOST_WIDE_INT ignore_masks;
3585 struct cl_target_option *main_target_opt
3586 = ((global_init_p || target_option_default_node == NULL)
3587 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3589 /* Print defaults. */
3590 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3591 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3593 /* Remember the explicit arguments. */
3595 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3597 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3598 library functions, so warn about it. The flag may be useful for
3599 performance studies from time to time though, so don't disable it
3601 if (global_options_set.x_rs6000_alignment_flags
3602 && rs6000_alignment_flags == MASK_ALIGN_POWER
3603 && DEFAULT_ABI == ABI_DARWIN
3605 warning (0, "%qs is not supported for 64-bit Darwin;"
3606 " it is incompatible with the installed C and C++ libraries",
3609 /* Numerous experiment shows that IRA based loop pressure
3610 calculation works better for RTL loop invariant motion on targets
3611 with enough (>= 32) registers. It is an expensive optimization.
3612 So it is on only for peak performance. */
3613 if (optimize >= 3 && global_init_p
3614 && !global_options_set.x_flag_ira_loop_pressure)
3615 flag_ira_loop_pressure = 1;
3617 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3618 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3619 options were already specified. */
3620 if (flag_sanitize & SANITIZE_USER_ADDRESS
3621 && !global_options_set.x_flag_asynchronous_unwind_tables)
3622 flag_asynchronous_unwind_tables = 1;
3624 /* Set the pointer size. */
3627 rs6000_pmode = DImode;
3628 rs6000_pointer_size = 64;
3632 rs6000_pmode = SImode;
3633 rs6000_pointer_size = 32;
3636 /* Some OSs don't support saving the high part of 64-bit registers on context
3637 switch. Other OSs don't support saving Altivec registers. On those OSs,
3638 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3639 if the user wants either, the user must explicitly specify them and we
3640 won't interfere with the user's specification. */
3642 set_masks = POWERPC_MASKS;
3643 #ifdef OS_MISSING_POWERPC64
3644 if (OS_MISSING_POWERPC64)
3645 set_masks &= ~OPTION_MASK_POWERPC64;
3647 #ifdef OS_MISSING_ALTIVEC
3648 if (OS_MISSING_ALTIVEC)
3649 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3650 | OTHER_VSX_VECTOR_MASKS);
3653 /* Don't override by the processor default if given explicitly. */
3654 set_masks &= ~rs6000_isa_flags_explicit;
3656 if (global_init_p && rs6000_dejagnu_cpu_index >= 0)
3657 rs6000_cpu_index = rs6000_dejagnu_cpu_index;
3659 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3660 the cpu in a target attribute or pragma, but did not specify a tuning
3661 option, use the cpu for the tuning option rather than the option specified
3662 with -mtune on the command line. Process a '--with-cpu' configuration
3663 request as an implicit --cpu. */
3664 if (rs6000_cpu_index >= 0)
3665 cpu_index = rs6000_cpu_index;
3666 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3667 cpu_index = main_target_opt->x_rs6000_cpu_index;
3668 else if (OPTION_TARGET_CPU_DEFAULT)
3669 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3671 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3672 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3673 with those from the cpu, except for options that were explicitly set. If
3674 we don't have a cpu, do not override the target bits set in
3678 rs6000_cpu_index = cpu_index;
3679 rs6000_isa_flags &= ~set_masks;
3680 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3685 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3686 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3687 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3688 to using rs6000_isa_flags, we need to do the initialization here.
3690 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3691 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3692 HOST_WIDE_INT flags;
3694 flags = TARGET_DEFAULT;
3697 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3698 const char *default_cpu = (!TARGET_POWERPC64
3703 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3704 flags = processor_target_table[default_cpu_index].target_enable;
3706 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3709 if (rs6000_tune_index >= 0)
3710 tune_index = rs6000_tune_index;
3711 else if (cpu_index >= 0)
3712 rs6000_tune_index = tune_index = cpu_index;
3716 enum processor_type tune_proc
3717 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3720 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3721 if (processor_target_table[i].processor == tune_proc)
3729 rs6000_cpu = processor_target_table[cpu_index].processor;
3731 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3733 gcc_assert (tune_index >= 0);
3734 rs6000_tune = processor_target_table[tune_index].processor;
3736 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3737 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3738 || rs6000_cpu == PROCESSOR_PPCE5500)
3741 error ("AltiVec not supported in this target");
3744 /* If we are optimizing big endian systems for space, use the load/store
3745 multiple instructions. */
3746 if (BYTES_BIG_ENDIAN && optimize_size)
3747 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3749 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3750 because the hardware doesn't support the instructions used in little
3751 endian mode, and causes an alignment trap. The 750 does not cause an
3752 alignment trap (except when the target is unaligned). */
3754 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3756 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3757 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3758 warning (0, "%qs is not supported on little endian systems",
3762 /* If little-endian, default to -mstrict-align on older processors.
3763 Testing for htm matches power8 and later. */
3764 if (!BYTES_BIG_ENDIAN
3765 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3766 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3768 if (!rs6000_fold_gimple)
3770 "gimple folding of rs6000 builtins has been disabled.\n");
3772 /* Add some warnings for VSX. */
3775 const char *msg = NULL;
3776 if (!TARGET_HARD_FLOAT)
3778 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3779 msg = N_("%<-mvsx%> requires hardware floating point");
3782 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3783 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3786 else if (TARGET_AVOID_XFORM > 0)
3787 msg = N_("%<-mvsx%> needs indexed addressing");
3788 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3789 & OPTION_MASK_ALTIVEC))
3791 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3792 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3794 msg = N_("%<-mno-altivec%> disables vsx");
3800 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3801 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3805 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3806 the -mcpu setting to enable options that conflict. */
3807 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3808 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3809 | OPTION_MASK_ALTIVEC
3810 | OPTION_MASK_VSX)) != 0)
3811 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3812 | OPTION_MASK_DIRECT_MOVE)
3813 & ~rs6000_isa_flags_explicit);
3815 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3816 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3818 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3819 off all of the options that depend on those flags. */
3820 ignore_masks = rs6000_disable_incompatible_switches ();
3822 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3823 unless the user explicitly used the -mno-<option> to disable the code. */
3824 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3825 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3826 else if (TARGET_P9_MINMAX)
3830 if (cpu_index == PROCESSOR_POWER9)
3832 /* legacy behavior: allow -mcpu=power9 with certain
3833 capabilities explicitly disabled. */
3834 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3837 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3838 "for <xxx> less than power9", "-mcpu");
3840 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3841 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3842 & rs6000_isa_flags_explicit))
3843 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3844 were explicitly cleared. */
3845 error ("%qs incompatible with explicitly disabled options",
3848 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3850 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3851 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3852 else if (TARGET_VSX)
3853 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3854 else if (TARGET_POPCNTD)
3855 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3856 else if (TARGET_DFP)
3857 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3858 else if (TARGET_CMPB)
3859 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3860 else if (TARGET_FPRND)
3861 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3862 else if (TARGET_POPCNTB)
3863 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3864 else if (TARGET_ALTIVEC)
3865 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3867 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3869 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3870 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3871 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3874 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3876 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3877 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3878 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3881 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3883 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3884 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3885 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3888 if (TARGET_P8_VECTOR && !TARGET_VSX)
3890 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3891 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3892 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3893 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3895 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3896 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3897 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3901 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3903 rs6000_isa_flags |= OPTION_MASK_VSX;
3904 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3908 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3910 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3911 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3912 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3915 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3916 silently turn off quad memory mode. */
3917 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3919 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3920 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3922 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3923 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3925 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3926 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3929 /* Non-atomic quad memory load/store are disabled for little endian, since
3930 the words are reversed, but atomic operations can still be done by
3931 swapping the words. */
3932 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3934 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3935 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3938 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3941 /* Assume if the user asked for normal quad memory instructions, they want
3942 the atomic versions as well, unless they explicity told us not to use quad
3943 word atomic instructions. */
3944 if (TARGET_QUAD_MEMORY
3945 && !TARGET_QUAD_MEMORY_ATOMIC
3946 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3947 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3949 /* If we can shrink-wrap the TOC register save separately, then use
3950 -msave-toc-indirect unless explicitly disabled. */
3951 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3952 && flag_shrink_wrap_separate
3953 && optimize_function_for_speed_p (cfun))
3954 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3956 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3957 generating power8 instructions. Power9 does not optimize power8 fusion
3959 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3961 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3962 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3964 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3967 /* Setting additional fusion flags turns on base fusion. */
3968 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3970 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3972 if (TARGET_P8_FUSION_SIGN)
3973 error ("%qs requires %qs", "-mpower8-fusion-sign",
3976 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3979 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3982 /* Power8 does not fuse sign extended loads with the addis. If we are
3983 optimizing at high levels for speed, convert a sign extended load into a
3984 zero extending load, and an explicit sign extension. */
3985 if (TARGET_P8_FUSION
3986 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3987 && optimize_function_for_speed_p (cfun)
3989 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3991 /* ISA 3.0 vector instructions include ISA 2.07. */
3992 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3994 /* We prefer to not mention undocumented options in
3995 error messages. However, if users have managed to select
3996 power9-vector without selecting power8-vector, they
3997 already know about undocumented flags. */
3998 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3999 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4000 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4001 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4003 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4004 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4005 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4009 /* OPTION_MASK_P9_VECTOR is explicit and
4010 OPTION_MASK_P8_VECTOR is not explicit. */
4011 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4012 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4016 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4017 support. If we only have ISA 2.06 support, and the user did not specify
4018 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4019 but we don't enable the full vectorization support */
4020 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4021 TARGET_ALLOW_MOVMISALIGN = 1;
4023 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4025 if (TARGET_ALLOW_MOVMISALIGN > 0
4026 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4027 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4029 TARGET_ALLOW_MOVMISALIGN = 0;
4032 /* Determine when unaligned vector accesses are permitted, and when
4033 they are preferred over masked Altivec loads. Note that if
4034 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4035 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4037 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4041 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4042 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4044 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4047 else if (!TARGET_ALLOW_MOVMISALIGN)
4049 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4050 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4051 "-mallow-movmisalign");
4053 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4057 /* Use long double size to select the appropriate long double. We use
4058 TYPE_PRECISION to differentiate the 3 different long double types. We map
4059 128 into the precision used for TFmode. */
4060 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4062 : FLOAT_PRECISION_TFmode);
4064 /* Set long double size before the IEEE 128-bit tests. */
4065 if (!global_options_set.x_rs6000_long_double_type_size)
4067 if (main_target_opt != NULL
4068 && (main_target_opt->x_rs6000_long_double_type_size
4069 != default_long_double_size))
4070 error ("target attribute or pragma changes %<long double%> size");
4072 rs6000_long_double_type_size = default_long_double_size;
4074 else if (rs6000_long_double_type_size == 128)
4075 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4076 else if (global_options_set.x_rs6000_ieeequad)
4078 if (global_options.x_rs6000_ieeequad)
4079 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4081 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4084 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4085 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4086 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4087 those systems will not pick up this default. Warn if the user changes the
4088 default unless -Wno-psabi. */
4089 if (!global_options_set.x_rs6000_ieeequad)
4090 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4094 if (global_options.x_rs6000_ieeequad
4095 && (!TARGET_POPCNTD || !TARGET_VSX))
4096 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4098 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4100 static bool warned_change_long_double;
4101 if (!warned_change_long_double)
4103 warned_change_long_double = true;
4104 if (TARGET_IEEEQUAD)
4105 warning (OPT_Wpsabi, "Using IEEE extended precision "
4108 warning (OPT_Wpsabi, "Using IBM extended precision "
4114 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4115 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4116 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4117 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4118 the keyword as well as the type. */
4119 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4121 /* IEEE 128-bit floating point requires VSX support. */
4122 if (TARGET_FLOAT128_KEYWORD)
4126 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4127 error ("%qs requires VSX support", "%<-mfloat128%>");
4129 TARGET_FLOAT128_TYPE = 0;
4130 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4131 | OPTION_MASK_FLOAT128_HW);
4133 else if (!TARGET_FLOAT128_TYPE)
4135 TARGET_FLOAT128_TYPE = 1;
4136 warning (0, "The %<-mfloat128%> option may not be fully supported");
4140 /* Enable the __float128 keyword under Linux by default. */
4141 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4142 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4143 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4145 /* If we have are supporting the float128 type and full ISA 3.0 support,
4146 enable -mfloat128-hardware by default. However, don't enable the
4147 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4148 because sometimes the compiler wants to put things in an integer
4149 container, and if we don't have __int128 support, it is impossible. */
4150 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4151 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4152 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4153 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4155 if (TARGET_FLOAT128_HW
4156 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4158 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4159 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4161 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4164 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4166 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4167 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4169 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4172 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4173 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4175 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4176 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4177 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4178 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4180 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4183 /* -mpcrel requires prefixed load/store addressing. */
4184 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4186 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4187 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4189 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4192 /* Print the options after updating the defaults. */
4193 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4194 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4196 /* E500mc does "better" if we inline more aggressively. Respect the
4197 user's opinion, though. */
4198 if (rs6000_block_move_inline_limit == 0
4199 && (rs6000_tune == PROCESSOR_PPCE500MC
4200 || rs6000_tune == PROCESSOR_PPCE500MC64
4201 || rs6000_tune == PROCESSOR_PPCE5500
4202 || rs6000_tune == PROCESSOR_PPCE6500))
4203 rs6000_block_move_inline_limit = 128;
4205 /* store_one_arg depends on expand_block_move to handle at least the
4206 size of reg_parm_stack_space. */
4207 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4208 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4212 /* If the appropriate debug option is enabled, replace the target hooks
4213 with debug versions that call the real version and then prints
4214 debugging information. */
4215 if (TARGET_DEBUG_COST)
4217 targetm.rtx_costs = rs6000_debug_rtx_costs;
4218 targetm.address_cost = rs6000_debug_address_cost;
4219 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4222 if (TARGET_DEBUG_ADDR)
4224 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4225 targetm.legitimize_address = rs6000_debug_legitimize_address;
4226 rs6000_secondary_reload_class_ptr
4227 = rs6000_debug_secondary_reload_class;
4228 targetm.secondary_memory_needed
4229 = rs6000_debug_secondary_memory_needed;
4230 targetm.can_change_mode_class
4231 = rs6000_debug_can_change_mode_class;
4232 rs6000_preferred_reload_class_ptr
4233 = rs6000_debug_preferred_reload_class;
4234 rs6000_mode_dependent_address_ptr
4235 = rs6000_debug_mode_dependent_address;
4238 if (rs6000_veclibabi_name)
4240 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4241 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4244 error ("unknown vectorization library ABI type (%qs) for "
4245 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4251 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4252 target attribute or pragma which automatically enables both options,
4253 unless the altivec ABI was set. This is set by default for 64-bit, but
4255 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4257 TARGET_FLOAT128_TYPE = 0;
4258 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4259 | OPTION_MASK_FLOAT128_KEYWORD)
4260 & ~rs6000_isa_flags_explicit);
4263 /* Enable Altivec ABI for AIX -maltivec. */
4264 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4266 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4267 error ("target attribute or pragma changes AltiVec ABI");
4269 rs6000_altivec_abi = 1;
4272 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4273 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4274 be explicitly overridden in either case. */
4277 if (!global_options_set.x_rs6000_altivec_abi
4278 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4280 if (main_target_opt != NULL &&
4281 !main_target_opt->x_rs6000_altivec_abi)
4282 error ("target attribute or pragma changes AltiVec ABI");
4284 rs6000_altivec_abi = 1;
4288 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4289 So far, the only darwin64 targets are also MACH-O. */
4291 && DEFAULT_ABI == ABI_DARWIN
4294 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4295 error ("target attribute or pragma changes darwin64 ABI");
4298 rs6000_darwin64_abi = 1;
4299 /* Default to natural alignment, for better performance. */
4300 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4304 /* Place FP constants in the constant pool instead of TOC
4305 if section anchors enabled. */
4306 if (flag_section_anchors
4307 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4308 TARGET_NO_FP_IN_TOC = 1;
4310 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4311 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4313 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4314 SUBTARGET_OVERRIDE_OPTIONS;
4316 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4317 SUBSUBTARGET_OVERRIDE_OPTIONS;
4319 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4320 SUB3TARGET_OVERRIDE_OPTIONS;
4323 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4324 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4326 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4327 && rs6000_tune != PROCESSOR_POWER5
4328 && rs6000_tune != PROCESSOR_POWER6
4329 && rs6000_tune != PROCESSOR_POWER7
4330 && rs6000_tune != PROCESSOR_POWER8
4331 && rs6000_tune != PROCESSOR_POWER9
4332 && rs6000_tune != PROCESSOR_FUTURE
4333 && rs6000_tune != PROCESSOR_PPCA2
4334 && rs6000_tune != PROCESSOR_CELL
4335 && rs6000_tune != PROCESSOR_PPC476);
4336 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4337 || rs6000_tune == PROCESSOR_POWER5
4338 || rs6000_tune == PROCESSOR_POWER7
4339 || rs6000_tune == PROCESSOR_POWER8);
4340 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4341 || rs6000_tune == PROCESSOR_POWER5
4342 || rs6000_tune == PROCESSOR_POWER6
4343 || rs6000_tune == PROCESSOR_POWER7
4344 || rs6000_tune == PROCESSOR_POWER8
4345 || rs6000_tune == PROCESSOR_POWER9
4346 || rs6000_tune == PROCESSOR_FUTURE
4347 || rs6000_tune == PROCESSOR_PPCE500MC
4348 || rs6000_tune == PROCESSOR_PPCE500MC64
4349 || rs6000_tune == PROCESSOR_PPCE5500
4350 || rs6000_tune == PROCESSOR_PPCE6500);
4352 /* Allow debug switches to override the above settings. These are set to -1
4353 in rs6000.opt to indicate the user hasn't directly set the switch. */
4354 if (TARGET_ALWAYS_HINT >= 0)
4355 rs6000_always_hint = TARGET_ALWAYS_HINT;
4357 if (TARGET_SCHED_GROUPS >= 0)
4358 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4360 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4361 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4363 rs6000_sched_restricted_insns_priority
4364 = (rs6000_sched_groups ? 1 : 0);
4366 /* Handle -msched-costly-dep option. */
4367 rs6000_sched_costly_dep
4368 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4370 if (rs6000_sched_costly_dep_str)
4372 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4373 rs6000_sched_costly_dep = no_dep_costly;
4374 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4375 rs6000_sched_costly_dep = all_deps_costly;
4376 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4377 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4378 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4379 rs6000_sched_costly_dep = store_to_load_dep_costly;
4381 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4382 atoi (rs6000_sched_costly_dep_str));
4385 /* Handle -minsert-sched-nops option. */
4386 rs6000_sched_insert_nops
4387 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4389 if (rs6000_sched_insert_nops_str)
4391 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4392 rs6000_sched_insert_nops = sched_finish_none;
4393 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4394 rs6000_sched_insert_nops = sched_finish_pad_groups;
4395 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4396 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4398 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4399 atoi (rs6000_sched_insert_nops_str));
4402 /* Handle stack protector */
4403 if (!global_options_set.x_rs6000_stack_protector_guard)
4404 #ifdef TARGET_THREAD_SSP_OFFSET
4405 rs6000_stack_protector_guard = SSP_TLS;
4407 rs6000_stack_protector_guard = SSP_GLOBAL;
4410 #ifdef TARGET_THREAD_SSP_OFFSET
4411 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4412 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4415 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4418 const char *str = rs6000_stack_protector_guard_offset_str;
4421 long offset = strtol (str, &endp, 0);
4422 if (!*str || *endp || errno)
4423 error ("%qs is not a valid number in %qs", str,
4424 "-mstack-protector-guard-offset=");
4426 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4427 || (TARGET_64BIT && (offset & 3)))
4428 error ("%qs is not a valid offset in %qs", str,
4429 "-mstack-protector-guard-offset=");
4431 rs6000_stack_protector_guard_offset = offset;
4434 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4436 const char *str = rs6000_stack_protector_guard_reg_str;
4437 int reg = decode_reg_name (str);
4439 if (!IN_RANGE (reg, 1, 31))
4440 error ("%qs is not a valid base register in %qs", str,
4441 "-mstack-protector-guard-reg=");
4443 rs6000_stack_protector_guard_reg = reg;
4446 if (rs6000_stack_protector_guard == SSP_TLS
4447 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4448 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4452 #ifdef TARGET_REGNAMES
4453 /* If the user desires alternate register names, copy in the
4454 alternate names now. */
4455 if (TARGET_REGNAMES)
4456 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4459 /* Set aix_struct_return last, after the ABI is determined.
4460 If -maix-struct-return or -msvr4-struct-return was explicitly
4461 used, don't override with the ABI default. */
4462 if (!global_options_set.x_aix_struct_return)
4463 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4466 /* IBM XL compiler defaults to unsigned bitfields. */
4467 if (TARGET_XL_COMPAT)
4468 flag_signed_bitfields = 0;
4471 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4472 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4474 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4476 /* We can only guarantee the availability of DI pseudo-ops when
4477 assembling for 64-bit targets. */
4480 targetm.asm_out.aligned_op.di = NULL;
4481 targetm.asm_out.unaligned_op.di = NULL;
4485 /* Set branch target alignment, if not optimizing for size. */
4488 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4489 aligned 8byte to avoid misprediction by the branch predictor. */
4490 if (rs6000_tune == PROCESSOR_TITAN
4491 || rs6000_tune == PROCESSOR_CELL)
4493 if (flag_align_functions && !str_align_functions)
4494 str_align_functions = "8";
4495 if (flag_align_jumps && !str_align_jumps)
4496 str_align_jumps = "8";
4497 if (flag_align_loops && !str_align_loops)
4498 str_align_loops = "8";
4500 if (rs6000_align_branch_targets)
4502 if (flag_align_functions && !str_align_functions)
4503 str_align_functions = "16";
4504 if (flag_align_jumps && !str_align_jumps)
4505 str_align_jumps = "16";
4506 if (flag_align_loops && !str_align_loops)
4508 can_override_loop_align = 1;
4509 str_align_loops = "16";
4513 if (flag_align_jumps && !str_align_jumps)
4514 str_align_jumps = "16";
4515 if (flag_align_loops && !str_align_loops)
4516 str_align_loops = "16";
4519 /* Arrange to save and restore machine status around nested functions. */
4520 init_machine_status = rs6000_init_machine_status;
4522 /* We should always be splitting complex arguments, but we can't break
4523 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4524 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4525 targetm.calls.split_complex_arg = NULL;
4527 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4528 if (DEFAULT_ABI == ABI_AIX)
4529 targetm.calls.custom_function_descriptors = 0;
4532 /* Initialize rs6000_cost with the appropriate target costs. */
4534 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4536 switch (rs6000_tune)
4538 case PROCESSOR_RS64A:
4539 rs6000_cost = &rs64a_cost;
4542 case PROCESSOR_MPCCORE:
4543 rs6000_cost = &mpccore_cost;
4546 case PROCESSOR_PPC403:
4547 rs6000_cost = &ppc403_cost;
4550 case PROCESSOR_PPC405:
4551 rs6000_cost = &ppc405_cost;
4554 case PROCESSOR_PPC440:
4555 rs6000_cost = &ppc440_cost;
4558 case PROCESSOR_PPC476:
4559 rs6000_cost = &ppc476_cost;
4562 case PROCESSOR_PPC601:
4563 rs6000_cost = &ppc601_cost;
4566 case PROCESSOR_PPC603:
4567 rs6000_cost = &ppc603_cost;
4570 case PROCESSOR_PPC604:
4571 rs6000_cost = &ppc604_cost;
4574 case PROCESSOR_PPC604e:
4575 rs6000_cost = &ppc604e_cost;
4578 case PROCESSOR_PPC620:
4579 rs6000_cost = &ppc620_cost;
4582 case PROCESSOR_PPC630:
4583 rs6000_cost = &ppc630_cost;
4586 case PROCESSOR_CELL:
4587 rs6000_cost = &ppccell_cost;
4590 case PROCESSOR_PPC750:
4591 case PROCESSOR_PPC7400:
4592 rs6000_cost = &ppc750_cost;
4595 case PROCESSOR_PPC7450:
4596 rs6000_cost = &ppc7450_cost;
4599 case PROCESSOR_PPC8540:
4600 case PROCESSOR_PPC8548:
4601 rs6000_cost = &ppc8540_cost;
4604 case PROCESSOR_PPCE300C2:
4605 case PROCESSOR_PPCE300C3:
4606 rs6000_cost = &ppce300c2c3_cost;
4609 case PROCESSOR_PPCE500MC:
4610 rs6000_cost = &ppce500mc_cost;
4613 case PROCESSOR_PPCE500MC64:
4614 rs6000_cost = &ppce500mc64_cost;
4617 case PROCESSOR_PPCE5500:
4618 rs6000_cost = &ppce5500_cost;
4621 case PROCESSOR_PPCE6500:
4622 rs6000_cost = &ppce6500_cost;
4625 case PROCESSOR_TITAN:
4626 rs6000_cost = &titan_cost;
4629 case PROCESSOR_POWER4:
4630 case PROCESSOR_POWER5:
4631 rs6000_cost = &power4_cost;
4634 case PROCESSOR_POWER6:
4635 rs6000_cost = &power6_cost;
4638 case PROCESSOR_POWER7:
4639 rs6000_cost = &power7_cost;
4642 case PROCESSOR_POWER8:
4643 rs6000_cost = &power8_cost;
4646 case PROCESSOR_POWER9:
4647 case PROCESSOR_FUTURE:
4648 rs6000_cost = &power9_cost;
4651 case PROCESSOR_PPCA2:
4652 rs6000_cost = &ppca2_cost;
4661 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4662 rs6000_cost->simultaneous_prefetches,
4663 global_options.x_param_values,
4664 global_options_set.x_param_values);
4665 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4666 global_options.x_param_values,
4667 global_options_set.x_param_values);
4668 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4669 rs6000_cost->cache_line_size,
4670 global_options.x_param_values,
4671 global_options_set.x_param_values);
4672 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4673 global_options.x_param_values,
4674 global_options_set.x_param_values);
4676 /* Increase loop peeling limits based on performance analysis. */
4677 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4678 global_options.x_param_values,
4679 global_options_set.x_param_values);
4680 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4681 global_options.x_param_values,
4682 global_options_set.x_param_values);
4684 /* Use the 'model' -fsched-pressure algorithm by default. */
4685 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4686 SCHED_PRESSURE_MODEL,
4687 global_options.x_param_values,
4688 global_options_set.x_param_values);
4690 /* If using typedef char *va_list, signal that
4691 __builtin_va_start (&ap, 0) can be optimized to
4692 ap = __builtin_next_arg (0). */
4693 if (DEFAULT_ABI != ABI_V4)
4694 targetm.expand_builtin_va_start = NULL;
4697 /* If not explicitly specified via option, decide whether to generate indexed
4698 load/store instructions. A value of -1 indicates that the
4699 initial value of this variable has not been overwritten. During
4700 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4701 if (TARGET_AVOID_XFORM == -1)
4702 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4703 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4704 need indexed accesses and the type used is the scalar type of the element
4705 being loaded or stored. */
4706 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4707 && !TARGET_ALTIVEC);
4709 /* Set the -mrecip options. */
4710 if (rs6000_recip_name)
4712 char *p = ASTRDUP (rs6000_recip_name);
4714 unsigned int mask, i;
4717 while ((q = strtok (p, ",")) != NULL)
4728 if (!strcmp (q, "default"))
4729 mask = ((TARGET_RECIP_PRECISION)
4730 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4733 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4734 if (!strcmp (q, recip_options[i].string))
4736 mask = recip_options[i].mask;
4740 if (i == ARRAY_SIZE (recip_options))
4742 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4750 rs6000_recip_control &= ~mask;
4752 rs6000_recip_control |= mask;
4756 /* Set the builtin mask of the various options used that could affect which
4757 builtins were used. In the past we used target_flags, but we've run out
4758 of bits, and some options are no longer in target_flags. */
4759 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4760 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4761 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4762 rs6000_builtin_mask);
4764 /* Initialize all of the registers. */
4765 rs6000_init_hard_regno_mode_ok (global_init_p);
4767 /* Save the initial options in case the user does function specific options */
4769 target_option_default_node = target_option_current_node
4770 = build_target_option_node (&global_options);
4772 /* If not explicitly specified via option, decide whether to generate the
4773 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4774 if (TARGET_LINK_STACK == -1)
4775 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4777 /* Deprecate use of -mno-speculate-indirect-jumps. */
4778 if (!rs6000_speculate_indirect_jumps)
4779 warning (0, "%qs is deprecated and not recommended in any circumstances",
4780 "-mno-speculate-indirect-jumps");
4785 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4786 define the target cpu type. */
4789 rs6000_option_override (void)
4791 (void) rs6000_option_override_internal (true);
4795 /* Implement targetm.vectorize.builtin_mask_for_load. */
4797 rs6000_builtin_mask_for_load (void)
4799 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4800 if ((TARGET_ALTIVEC && !TARGET_VSX)
4801 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4802 return altivec_builtin_mask_for_load;
4807 /* Implement LOOP_ALIGN. */
4809 rs6000_loop_align (rtx label)
4814 /* Don't override loop alignment if -falign-loops was specified. */
4815 if (!can_override_loop_align)
4818 bb = BLOCK_FOR_INSN (label);
4819 ninsns = num_loop_insns(bb->loop_father);
4821 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4822 if (ninsns > 4 && ninsns <= 8
4823 && (rs6000_tune == PROCESSOR_POWER4
4824 || rs6000_tune == PROCESSOR_POWER5
4825 || rs6000_tune == PROCESSOR_POWER6
4826 || rs6000_tune == PROCESSOR_POWER7
4827 || rs6000_tune == PROCESSOR_POWER8))
4828 return align_flags (5);
4833 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4834 after applying N number of iterations. This routine does not determine
4835 how may iterations are required to reach desired alignment. */
4838 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4845 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4848 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4858 /* Assuming that all other types are naturally aligned. CHECKME! */
4863 /* Return true if the vector misalignment factor is supported by the
4866 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4873 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4876 /* Return if movmisalign pattern is not supported for this mode. */
4877 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4880 if (misalignment == -1)
4882 /* Misalignment factor is unknown at compile time but we know
4883 it's word aligned. */
4884 if (rs6000_vector_alignment_reachable (type, is_packed))
4886 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4888 if (element_size == 64 || element_size == 32)
4895 /* VSX supports word-aligned vector. */
4896 if (misalignment % 4 == 0)
4902 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4904 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4905 tree vectype, int misalign)
4910 switch (type_of_cost)
4920 case cond_branch_not_taken:
4929 case vec_promote_demote:
4935 case cond_branch_taken:
4938 case unaligned_load:
4939 case vector_gather_load:
4940 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4943 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4945 elements = TYPE_VECTOR_SUBPARTS (vectype);
4947 /* Double word aligned. */
4955 /* Double word aligned. */
4959 /* Unknown misalignment. */
4972 /* Misaligned loads are not supported. */
4977 case unaligned_store:
4978 case vector_scatter_store:
4979 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4982 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4984 elements = TYPE_VECTOR_SUBPARTS (vectype);
4986 /* Double word aligned. */
4994 /* Double word aligned. */
4998 /* Unknown misalignment. */
5011 /* Misaligned stores are not supported. */
5017 /* This is a rough approximation assuming non-constant elements
5018 constructed into a vector via element insertion. FIXME:
5019 vec_construct is not granular enough for uniformly good
5020 decisions. If the initialization is a splat, this is
5021 cheaper than we estimate. Improve this someday. */
5022 elem_type = TREE_TYPE (vectype);
5023 /* 32-bit vectors loaded into registers are stored as double
5024 precision, so we need 2 permutes, 2 converts, and 1 merge
5025 to construct a vector of short floats from them. */
5026 if (SCALAR_FLOAT_TYPE_P (elem_type)
5027 && TYPE_PRECISION (elem_type) == 32)
5029 /* On POWER9, integer vector types are built up in GPRs and then
5030 use a direct move (2 cycles). For POWER8 this is even worse,
5031 as we need two direct moves and a merge, and the direct moves
5033 else if (INTEGRAL_TYPE_P (elem_type))
5035 if (TARGET_P9_VECTOR)
5036 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5038 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5041 /* V2DFmode doesn't need a direct move. */
5049 /* Implement targetm.vectorize.preferred_simd_mode. */
5052 rs6000_preferred_simd_mode (scalar_mode mode)
5061 if (TARGET_ALTIVEC || TARGET_VSX)
5081 typedef struct _rs6000_cost_data
5083 struct loop *loop_info;
5087 /* Test for likely overcommitment of vector hardware resources. If a
5088 loop iteration is relatively large, and too large a percentage of
5089 instructions in the loop are vectorized, the cost model may not
5090 adequately reflect delays from unavailable vector resources.
5091 Penalize the loop body cost for this case. */
5094 rs6000_density_test (rs6000_cost_data *data)
5096 const int DENSITY_PCT_THRESHOLD = 85;
5097 const int DENSITY_SIZE_THRESHOLD = 70;
5098 const int DENSITY_PENALTY = 10;
5099 struct loop *loop = data->loop_info;
5100 basic_block *bbs = get_loop_body (loop);
5101 int nbbs = loop->num_nodes;
5102 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5103 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5106 for (i = 0; i < nbbs; i++)
5108 basic_block bb = bbs[i];
5109 gimple_stmt_iterator gsi;
5111 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5113 gimple *stmt = gsi_stmt (gsi);
5114 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5116 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5117 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5123 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5125 if (density_pct > DENSITY_PCT_THRESHOLD
5126 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5128 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5129 if (dump_enabled_p ())
5130 dump_printf_loc (MSG_NOTE, vect_location,
5131 "density %d%%, cost %d exceeds threshold, penalizing "
5132 "loop body cost by %d%%", density_pct,
5133 vec_cost + not_vec_cost, DENSITY_PENALTY);
5137 /* Implement targetm.vectorize.init_cost. */
5139 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5140 instruction is needed by the vectorization. */
5141 static bool rs6000_vect_nonmem;
5144 rs6000_init_cost (struct loop *loop_info)
5146 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5147 data->loop_info = loop_info;
5148 data->cost[vect_prologue] = 0;
5149 data->cost[vect_body] = 0;
5150 data->cost[vect_epilogue] = 0;
5151 rs6000_vect_nonmem = false;
5155 /* Implement targetm.vectorize.add_stmt_cost. */
5158 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5159 struct _stmt_vec_info *stmt_info, int misalign,
5160 enum vect_cost_model_location where)
5162 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5163 unsigned retval = 0;
5165 if (flag_vect_cost_model)
5167 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5168 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5170 /* Statements in an inner loop relative to the loop being
5171 vectorized are weighted more heavily. The value here is
5172 arbitrary and could potentially be improved with analysis. */
5173 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5174 count *= 50; /* FIXME. */
5176 retval = (unsigned) (count * stmt_cost);
5177 cost_data->cost[where] += retval;
5179 /* Check whether we're doing something other than just a copy loop.
5180 Not all such loops may be profitably vectorized; see
5181 rs6000_finish_cost. */
5182 if ((kind == vec_to_scalar || kind == vec_perm
5183 || kind == vec_promote_demote || kind == vec_construct
5184 || kind == scalar_to_vec)
5185 || (where == vect_body && kind == vector_stmt))
5186 rs6000_vect_nonmem = true;
5192 /* Implement targetm.vectorize.finish_cost. */
5195 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5196 unsigned *body_cost, unsigned *epilogue_cost)
5198 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5200 if (cost_data->loop_info)
5201 rs6000_density_test (cost_data);
5203 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5204 that require versioning for any reason. The vectorization is at
5205 best a wash inside the loop, and the versioning checks make
5206 profitability highly unlikely and potentially quite harmful. */
5207 if (cost_data->loop_info)
5209 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5210 if (!rs6000_vect_nonmem
5211 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5212 && LOOP_REQUIRES_VERSIONING (vec_info))
5213 cost_data->cost[vect_body] += 10000;
5216 *prologue_cost = cost_data->cost[vect_prologue];
5217 *body_cost = cost_data->cost[vect_body];
5218 *epilogue_cost = cost_data->cost[vect_epilogue];
5221 /* Implement targetm.vectorize.destroy_cost_data. */
5224 rs6000_destroy_cost_data (void *data)
5229 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5230 library with vectorized intrinsics. */
5233 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5237 const char *suffix = NULL;
5238 tree fntype, new_fndecl, bdecl = NULL_TREE;
5241 machine_mode el_mode, in_mode;
5244 /* Libmass is suitable for unsafe math only as it does not correctly support
5245 parts of IEEE with the required precision such as denormals. Only support
5246 it if we have VSX to use the simd d2 or f4 functions.
5247 XXX: Add variable length support. */
5248 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5251 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5252 n = TYPE_VECTOR_SUBPARTS (type_out);
5253 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5254 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5255 if (el_mode != in_mode
5291 if (el_mode == DFmode && n == 2)
5293 bdecl = mathfn_built_in (double_type_node, fn);
5294 suffix = "d2"; /* pow -> powd2 */
5296 else if (el_mode == SFmode && n == 4)
5298 bdecl = mathfn_built_in (float_type_node, fn);
5299 suffix = "4"; /* powf -> powf4 */
5311 gcc_assert (suffix != NULL);
5312 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5316 strcpy (name, bname + sizeof ("__builtin_") - 1);
5317 strcat (name, suffix);
5320 fntype = build_function_type_list (type_out, type_in, NULL);
5321 else if (n_args == 2)
5322 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5326 /* Build a function declaration for the vectorized function. */
5327 new_fndecl = build_decl (BUILTINS_LOCATION,
5328 FUNCTION_DECL, get_identifier (name), fntype);
5329 TREE_PUBLIC (new_fndecl) = 1;
5330 DECL_EXTERNAL (new_fndecl) = 1;
5331 DECL_IS_NOVOPS (new_fndecl) = 1;
5332 TREE_READONLY (new_fndecl) = 1;
5337 /* Returns a function decl for a vectorized version of the builtin function
5338 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5339 if it is not available. */
5342 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5345 machine_mode in_mode, out_mode;
5348 if (TARGET_DEBUG_BUILTIN)
5349 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5350 combined_fn_name (combined_fn (fn)),
5351 GET_MODE_NAME (TYPE_MODE (type_out)),
5352 GET_MODE_NAME (TYPE_MODE (type_in)));
5354 if (TREE_CODE (type_out) != VECTOR_TYPE
5355 || TREE_CODE (type_in) != VECTOR_TYPE)
5358 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5359 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5360 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5361 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5366 if (VECTOR_UNIT_VSX_P (V2DFmode)
5367 && out_mode == DFmode && out_n == 2
5368 && in_mode == DFmode && in_n == 2)
5369 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5370 if (VECTOR_UNIT_VSX_P (V4SFmode)
5371 && out_mode == SFmode && out_n == 4
5372 && in_mode == SFmode && in_n == 4)
5373 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5374 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5375 && out_mode == SFmode && out_n == 4
5376 && in_mode == SFmode && in_n == 4)
5377 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5380 if (VECTOR_UNIT_VSX_P (V2DFmode)
5381 && out_mode == DFmode && out_n == 2
5382 && in_mode == DFmode && in_n == 2)
5383 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5384 if (VECTOR_UNIT_VSX_P (V4SFmode)
5385 && out_mode == SFmode && out_n == 4
5386 && in_mode == SFmode && in_n == 4)
5387 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5388 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5389 && out_mode == SFmode && out_n == 4
5390 && in_mode == SFmode && in_n == 4)
5391 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5394 if (VECTOR_UNIT_VSX_P (V2DFmode)
5395 && out_mode == DFmode && out_n == 2
5396 && in_mode == DFmode && in_n == 2)
5397 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5398 if (VECTOR_UNIT_VSX_P (V4SFmode)
5399 && out_mode == SFmode && out_n == 4
5400 && in_mode == SFmode && in_n == 4)
5401 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5402 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5403 && out_mode == SFmode && out_n == 4
5404 && in_mode == SFmode && in_n == 4)
5405 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5408 if (VECTOR_UNIT_VSX_P (V2DFmode)
5409 && out_mode == DFmode && out_n == 2
5410 && in_mode == DFmode && in_n == 2)
5411 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5412 if (VECTOR_UNIT_VSX_P (V4SFmode)
5413 && out_mode == SFmode && out_n == 4
5414 && in_mode == SFmode && in_n == 4)
5415 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5416 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5417 && out_mode == SFmode && out_n == 4
5418 && in_mode == SFmode && in_n == 4)
5419 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5422 if (VECTOR_UNIT_VSX_P (V2DFmode)
5423 && out_mode == DFmode && out_n == 2
5424 && in_mode == DFmode && in_n == 2)
5425 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5426 if (VECTOR_UNIT_VSX_P (V4SFmode)
5427 && out_mode == SFmode && out_n == 4
5428 && in_mode == SFmode && in_n == 4)
5429 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5430 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5431 && out_mode == SFmode && out_n == 4
5432 && in_mode == SFmode && in_n == 4)
5433 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5436 if (VECTOR_UNIT_VSX_P (V2DFmode)
5437 && flag_unsafe_math_optimizations
5438 && out_mode == DFmode && out_n == 2
5439 && in_mode == DFmode && in_n == 2)
5440 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5441 if (VECTOR_UNIT_VSX_P (V4SFmode)
5442 && flag_unsafe_math_optimizations
5443 && out_mode == SFmode && out_n == 4
5444 && in_mode == SFmode && in_n == 4)
5445 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5448 if (VECTOR_UNIT_VSX_P (V2DFmode)
5449 && !flag_trapping_math
5450 && out_mode == DFmode && out_n == 2
5451 && in_mode == DFmode && in_n == 2)
5452 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5453 if (VECTOR_UNIT_VSX_P (V4SFmode)
5454 && !flag_trapping_math
5455 && out_mode == SFmode && out_n == 4
5456 && in_mode == SFmode && in_n == 4)
5457 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5463 /* Generate calls to libmass if appropriate. */
5464 if (rs6000_veclib_handler)
5465 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5470 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5473 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5476 machine_mode in_mode, out_mode;
5479 if (TARGET_DEBUG_BUILTIN)
5480 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5481 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5482 GET_MODE_NAME (TYPE_MODE (type_out)),
5483 GET_MODE_NAME (TYPE_MODE (type_in)));
5485 if (TREE_CODE (type_out) != VECTOR_TYPE
5486 || TREE_CODE (type_in) != VECTOR_TYPE)
5489 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5490 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5491 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5492 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5494 enum rs6000_builtins fn
5495 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5498 case RS6000_BUILTIN_RSQRTF:
5499 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5500 && out_mode == SFmode && out_n == 4
5501 && in_mode == SFmode && in_n == 4)
5502 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5504 case RS6000_BUILTIN_RSQRT:
5505 if (VECTOR_UNIT_VSX_P (V2DFmode)
5506 && out_mode == DFmode && out_n == 2
5507 && in_mode == DFmode && in_n == 2)
5508 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5510 case RS6000_BUILTIN_RECIPF:
5511 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5512 && out_mode == SFmode && out_n == 4
5513 && in_mode == SFmode && in_n == 4)
5514 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5516 case RS6000_BUILTIN_RECIP:
5517 if (VECTOR_UNIT_VSX_P (V2DFmode)
5518 && out_mode == DFmode && out_n == 2
5519 && in_mode == DFmode && in_n == 2)
5520 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5528 /* Default CPU string for rs6000*_file_start functions. */
5529 static const char *rs6000_default_cpu;
5531 #ifdef USING_ELFOS_H
5532 const char *rs6000_machine;
5535 rs6000_machine_from_flags (void)
5537 if ((rs6000_isa_flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER))
5540 if ((rs6000_isa_flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5542 if ((rs6000_isa_flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5544 if ((rs6000_isa_flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5546 if ((rs6000_isa_flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5548 if ((rs6000_isa_flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5550 if ((rs6000_isa_flags & ISA_2_1_MASKS) != 0)
5552 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5558 emit_asm_machine (void)
5560 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5564 /* Do anything needed at the start of the asm file. */
5567 rs6000_file_start (void)
5570 const char *start = buffer;
5571 FILE *file = asm_out_file;
5573 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5575 default_file_start ();
5577 if (flag_verbose_asm)
5579 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5581 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5583 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5587 if (global_options_set.x_rs6000_cpu_index)
5589 fprintf (file, "%s -mcpu=%s", start,
5590 processor_target_table[rs6000_cpu_index].name);
5594 if (global_options_set.x_rs6000_tune_index)
5596 fprintf (file, "%s -mtune=%s", start,
5597 processor_target_table[rs6000_tune_index].name);
5601 if (PPC405_ERRATUM77)
5603 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5607 #ifdef USING_ELFOS_H
5608 switch (rs6000_sdata)
5610 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5611 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5612 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5613 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5616 if (rs6000_sdata && g_switch_value)
5618 fprintf (file, "%s -G %d", start,
5628 #ifdef USING_ELFOS_H
5629 rs6000_machine = rs6000_machine_from_flags ();
5630 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5631 && !global_options_set.x_rs6000_cpu_index)
5632 emit_asm_machine ();
5635 if (DEFAULT_ABI == ABI_ELFv2)
5636 fprintf (file, "\t.abiversion 2\n");
5640 /* Return nonzero if this function is known to have a null epilogue. */
5643 direct_return (void)
5645 if (reload_completed)
5647 rs6000_stack_t *info = rs6000_stack_info ();
5649 if (info->first_gp_reg_save == 32
5650 && info->first_fp_reg_save == 64
5651 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5652 && ! info->lr_save_p
5653 && ! info->cr_save_p
5654 && info->vrsave_size == 0
5662 /* Helper for num_insns_constant. Calculate number of instructions to
5663 load VALUE to a single gpr using combinations of addi, addis, ori,
5664 oris and sldi instructions. */
5667 num_insns_constant_gpr (HOST_WIDE_INT value)
5669 /* signed constant loadable with addi */
5670 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5673 /* constant loadable with addis */
5674 else if ((value & 0xffff) == 0
5675 && (value >> 31 == -1 || value >> 31 == 0))
5678 else if (TARGET_POWERPC64)
5680 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5681 HOST_WIDE_INT high = value >> 31;
5683 if (high == 0 || high == -1)
5689 return num_insns_constant_gpr (high) + 1;
5691 return num_insns_constant_gpr (low) + 1;
5693 return (num_insns_constant_gpr (high)
5694 + num_insns_constant_gpr (low) + 1);
5701 /* Helper for num_insns_constant. Allow constants formed by the
5702 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5703 and handle modes that require multiple gprs. */
5706 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5708 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5712 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5713 int insns = num_insns_constant_gpr (low);
5715 /* We won't get more than 2 from num_insns_constant_gpr
5716 except when TARGET_POWERPC64 and mode is DImode or
5717 wider, so the register mode must be DImode. */
5718 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5721 value >>= BITS_PER_WORD;
5726 /* Return the number of instructions it takes to form a constant in as
5727 many gprs are needed for MODE. */
5730 num_insns_constant (rtx op, machine_mode mode)
5734 switch (GET_CODE (op))
5740 case CONST_WIDE_INT:
5743 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5744 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5751 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5753 if (mode == SFmode || mode == SDmode)
5758 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5760 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5761 /* See the first define_split in rs6000.md handling a
5762 const_double_operand. */
5766 else if (mode == DFmode || mode == DDmode)
5771 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5773 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5775 /* See the second (32-bit) and third (64-bit) define_split
5776 in rs6000.md handling a const_double_operand. */
5777 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5778 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5781 else if (mode == TFmode || mode == TDmode
5782 || mode == KFmode || mode == IFmode)
5788 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5790 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5792 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5793 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5794 insns = num_insns_constant_multi (val, DImode);
5795 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5796 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5797 insns += num_insns_constant_multi (val, DImode);
5809 return num_insns_constant_multi (val, mode);
5812 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5813 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5814 corresponding element of the vector, but for V4SFmode, the
5815 corresponding "float" is interpreted as an SImode integer. */
5818 const_vector_elt_as_int (rtx op, unsigned int elt)
5822 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5823 gcc_assert (GET_MODE (op) != V2DImode
5824 && GET_MODE (op) != V2DFmode);
5826 tmp = CONST_VECTOR_ELT (op, elt);
5827 if (GET_MODE (op) == V4SFmode)
5828 tmp = gen_lowpart (SImode, tmp);
5829 return INTVAL (tmp);
5832 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5833 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5834 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5835 all items are set to the same value and contain COPIES replicas of the
5836 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5837 operand and the others are set to the value of the operand's msb. */
5840 vspltis_constant (rtx op, unsigned step, unsigned copies)
5842 machine_mode mode = GET_MODE (op);
5843 machine_mode inner = GET_MODE_INNER (mode);
5851 HOST_WIDE_INT splat_val;
5852 HOST_WIDE_INT msb_val;
5854 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5857 nunits = GET_MODE_NUNITS (mode);
5858 bitsize = GET_MODE_BITSIZE (inner);
5859 mask = GET_MODE_MASK (inner);
5861 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5863 msb_val = val >= 0 ? 0 : -1;
5865 /* Construct the value to be splatted, if possible. If not, return 0. */
5866 for (i = 2; i <= copies; i *= 2)
5868 HOST_WIDE_INT small_val;
5870 small_val = splat_val >> bitsize;
5872 if (splat_val != ((HOST_WIDE_INT)
5873 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5874 | (small_val & mask)))
5876 splat_val = small_val;
5879 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5880 if (EASY_VECTOR_15 (splat_val))
5883 /* Also check if we can splat, and then add the result to itself. Do so if
5884 the value is positive, of if the splat instruction is using OP's mode;
5885 for splat_val < 0, the splat and the add should use the same mode. */
5886 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5887 && (splat_val >= 0 || (step == 1 && copies == 1)))
5890 /* Also check if are loading up the most significant bit which can be done by
5891 loading up -1 and shifting the value left by -1. */
5892 else if (EASY_VECTOR_MSB (splat_val, inner))
5898 /* Check if VAL is present in every STEP-th element, and the
5899 other elements are filled with its most significant bit. */
5900 for (i = 1; i < nunits; ++i)
5902 HOST_WIDE_INT desired_val;
5903 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5904 if ((i & (step - 1)) == 0)
5907 desired_val = msb_val;
5909 if (desired_val != const_vector_elt_as_int (op, elt))
5916 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5917 instruction, filling in the bottom elements with 0 or -1.
5919 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5920 for the number of zeroes to shift in, or negative for the number of 0xff
5923 OP is a CONST_VECTOR. */
5926 vspltis_shifted (rtx op)
5928 machine_mode mode = GET_MODE (op);
5929 machine_mode inner = GET_MODE_INNER (mode);
5937 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5940 /* We need to create pseudo registers to do the shift, so don't recognize
5941 shift vector constants after reload. */
5942 if (!can_create_pseudo_p ())
5945 nunits = GET_MODE_NUNITS (mode);
5946 mask = GET_MODE_MASK (inner);
5948 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5950 /* Check if the value can really be the operand of a vspltis[bhw]. */
5951 if (EASY_VECTOR_15 (val))
5954 /* Also check if we are loading up the most significant bit which can be done
5955 by loading up -1 and shifting the value left by -1. */
5956 else if (EASY_VECTOR_MSB (val, inner))
5962 /* Check if VAL is present in every STEP-th element until we find elements
5963 that are 0 or all 1 bits. */
5964 for (i = 1; i < nunits; ++i)
5966 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5967 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5969 /* If the value isn't the splat value, check for the remaining elements
5975 for (j = i+1; j < nunits; ++j)
5977 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5978 if (const_vector_elt_as_int (op, elt2) != 0)
5982 return (nunits - i) * GET_MODE_SIZE (inner);
5985 else if ((elt_val & mask) == mask)
5987 for (j = i+1; j < nunits; ++j)
5989 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5990 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5994 return -((nunits - i) * GET_MODE_SIZE (inner));
6002 /* If all elements are equal, we don't need to do VLSDOI. */
6007 /* Return true if OP is of the given MODE and can be synthesized
6008 with a vspltisb, vspltish or vspltisw. */
6011 easy_altivec_constant (rtx op, machine_mode mode)
6013 unsigned step, copies;
6015 if (mode == VOIDmode)
6016 mode = GET_MODE (op);
6017 else if (mode != GET_MODE (op))
6020 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6022 if (mode == V2DFmode)
6023 return zero_constant (op, mode);
6025 else if (mode == V2DImode)
6027 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6028 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6031 if (zero_constant (op, mode))
6034 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6035 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6041 /* V1TImode is a special container for TImode. Ignore for now. */
6042 else if (mode == V1TImode)
6045 /* Start with a vspltisw. */
6046 step = GET_MODE_NUNITS (mode) / 4;
6049 if (vspltis_constant (op, step, copies))
6052 /* Then try with a vspltish. */
6058 if (vspltis_constant (op, step, copies))
6061 /* And finally a vspltisb. */
6067 if (vspltis_constant (op, step, copies))
6070 if (vspltis_shifted (op) != 0)
6076 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6077 result is OP. Abort if it is not possible. */
6080 gen_easy_altivec_constant (rtx op)
6082 machine_mode mode = GET_MODE (op);
6083 int nunits = GET_MODE_NUNITS (mode);
6084 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6085 unsigned step = nunits / 4;
6086 unsigned copies = 1;
6088 /* Start with a vspltisw. */
6089 if (vspltis_constant (op, step, copies))
6090 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6092 /* Then try with a vspltish. */
6098 if (vspltis_constant (op, step, copies))
6099 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6101 /* And finally a vspltisb. */
6107 if (vspltis_constant (op, step, copies))
6108 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6113 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6114 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6116 Return the number of instructions needed (1 or 2) into the address pointed
6119 Return the constant that is being split via CONSTANT_PTR. */
6122 xxspltib_constant_p (rtx op,
6127 size_t nunits = GET_MODE_NUNITS (mode);
6129 HOST_WIDE_INT value;
6132 /* Set the returned values to out of bound values. */
6133 *num_insns_ptr = -1;
6134 *constant_ptr = 256;
6136 if (!TARGET_P9_VECTOR)
6139 if (mode == VOIDmode)
6140 mode = GET_MODE (op);
6142 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6145 /* Handle (vec_duplicate <constant>). */
6146 if (GET_CODE (op) == VEC_DUPLICATE)
6148 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6149 && mode != V2DImode)
6152 element = XEXP (op, 0);
6153 if (!CONST_INT_P (element))
6156 value = INTVAL (element);
6157 if (!IN_RANGE (value, -128, 127))
6161 /* Handle (const_vector [...]). */
6162 else if (GET_CODE (op) == CONST_VECTOR)
6164 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6165 && mode != V2DImode)
6168 element = CONST_VECTOR_ELT (op, 0);
6169 if (!CONST_INT_P (element))
6172 value = INTVAL (element);
6173 if (!IN_RANGE (value, -128, 127))
6176 for (i = 1; i < nunits; i++)
6178 element = CONST_VECTOR_ELT (op, i);
6179 if (!CONST_INT_P (element))
6182 if (value != INTVAL (element))
6187 /* Handle integer constants being loaded into the upper part of the VSX
6188 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6189 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6190 else if (CONST_INT_P (op))
6192 if (!SCALAR_INT_MODE_P (mode))
6195 value = INTVAL (op);
6196 if (!IN_RANGE (value, -128, 127))
6199 if (!IN_RANGE (value, -1, 0))
6201 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6204 if (EASY_VECTOR_15 (value))
6212 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6213 sign extend. Special case 0/-1 to allow getting any VSX register instead
6214 of an Altivec register. */
6215 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6216 && EASY_VECTOR_15 (value))
6219 /* Return # of instructions and the constant byte for XXSPLTIB. */
6220 if (mode == V16QImode)
6223 else if (IN_RANGE (value, -1, 0))
6229 *constant_ptr = (int) value;
6234 output_vec_const_move (rtx *operands)
6242 mode = GET_MODE (dest);
6246 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6247 int xxspltib_value = 256;
6250 if (zero_constant (vec, mode))
6252 if (TARGET_P9_VECTOR)
6253 return "xxspltib %x0,0";
6255 else if (dest_vmx_p)
6256 return "vspltisw %0,0";
6259 return "xxlxor %x0,%x0,%x0";
6262 if (all_ones_constant (vec, mode))
6264 if (TARGET_P9_VECTOR)
6265 return "xxspltib %x0,255";
6267 else if (dest_vmx_p)
6268 return "vspltisw %0,-1";
6270 else if (TARGET_P8_VECTOR)
6271 return "xxlorc %x0,%x0,%x0";
6277 if (TARGET_P9_VECTOR
6278 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6282 operands[2] = GEN_INT (xxspltib_value & 0xff);
6283 return "xxspltib %x0,%2";
6294 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6295 if (zero_constant (vec, mode))
6296 return "vspltisw %0,0";
6298 if (all_ones_constant (vec, mode))
6299 return "vspltisw %0,-1";
6301 /* Do we need to construct a value using VSLDOI? */
6302 shift = vspltis_shifted (vec);
6306 splat_vec = gen_easy_altivec_constant (vec);
6307 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6308 operands[1] = XEXP (splat_vec, 0);
6309 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6312 switch (GET_MODE (splat_vec))
6315 return "vspltisw %0,%1";
6318 return "vspltish %0,%1";
6321 return "vspltisb %0,%1";
6331 /* Initialize vector TARGET to VALS. */
6334 rs6000_expand_vector_init (rtx target, rtx vals)
6336 machine_mode mode = GET_MODE (target);
6337 machine_mode inner_mode = GET_MODE_INNER (mode);
6338 int n_elts = GET_MODE_NUNITS (mode);
6339 int n_var = 0, one_var = -1;
6340 bool all_same = true, all_const_zero = true;
6344 for (i = 0; i < n_elts; ++i)
6346 x = XVECEXP (vals, 0, i);
6347 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6348 ++n_var, one_var = i;
6349 else if (x != CONST0_RTX (inner_mode))
6350 all_const_zero = false;
6352 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6358 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6359 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6360 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6362 /* Zero register. */
6363 emit_move_insn (target, CONST0_RTX (mode));
6366 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6368 /* Splat immediate. */
6369 emit_insn (gen_rtx_SET (target, const_vec));
6374 /* Load from constant pool. */
6375 emit_move_insn (target, const_vec);
6380 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6381 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6385 size_t num_elements = all_same ? 1 : 2;
6386 for (i = 0; i < num_elements; i++)
6388 op[i] = XVECEXP (vals, 0, i);
6389 /* Just in case there is a SUBREG with a smaller mode, do a
6391 if (GET_MODE (op[i]) != inner_mode)
6393 rtx tmp = gen_reg_rtx (inner_mode);
6394 convert_move (tmp, op[i], 0);
6397 /* Allow load with splat double word. */
6398 else if (MEM_P (op[i]))
6401 op[i] = force_reg (inner_mode, op[i]);
6403 else if (!REG_P (op[i]))
6404 op[i] = force_reg (inner_mode, op[i]);
6409 if (mode == V2DFmode)
6410 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6412 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6416 if (mode == V2DFmode)
6417 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6419 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6424 /* Special case initializing vector int if we are on 64-bit systems with
6425 direct move or we have the ISA 3.0 instructions. */
6426 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6427 && TARGET_DIRECT_MOVE_64BIT)
6431 rtx element0 = XVECEXP (vals, 0, 0);
6432 if (MEM_P (element0))
6433 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6435 element0 = force_reg (SImode, element0);
6437 if (TARGET_P9_VECTOR)
6438 emit_insn (gen_vsx_splat_v4si (target, element0));
6441 rtx tmp = gen_reg_rtx (DImode);
6442 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6443 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6452 for (i = 0; i < 4; i++)
6453 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6455 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6456 elements[2], elements[3]));
6461 /* With single precision floating point on VSX, know that internally single
6462 precision is actually represented as a double, and either make 2 V2DF
6463 vectors, and convert these vectors to single precision, or do one
6464 conversion, and splat the result to the other elements. */
6465 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6469 rtx element0 = XVECEXP (vals, 0, 0);
6471 if (TARGET_P9_VECTOR)
6473 if (MEM_P (element0))
6474 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6476 emit_insn (gen_vsx_splat_v4sf (target, element0));
6481 rtx freg = gen_reg_rtx (V4SFmode);
6482 rtx sreg = force_reg (SFmode, element0);
6483 rtx cvt = (TARGET_XSCVDPSPN
6484 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6485 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6488 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6494 rtx dbl_even = gen_reg_rtx (V2DFmode);
6495 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6496 rtx flt_even = gen_reg_rtx (V4SFmode);
6497 rtx flt_odd = gen_reg_rtx (V4SFmode);
6498 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6499 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6500 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6501 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6503 /* Use VMRGEW if we can instead of doing a permute. */
6504 if (TARGET_P8_VECTOR)
6506 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6507 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6508 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6509 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6510 if (BYTES_BIG_ENDIAN)
6511 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6513 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6517 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6518 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6519 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6520 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6521 rs6000_expand_extract_even (target, flt_even, flt_odd);
6527 /* Special case initializing vector short/char that are splats if we are on
6528 64-bit systems with direct move. */
6529 if (all_same && TARGET_DIRECT_MOVE_64BIT
6530 && (mode == V16QImode || mode == V8HImode))
6532 rtx op0 = XVECEXP (vals, 0, 0);
6533 rtx di_tmp = gen_reg_rtx (DImode);
6536 op0 = force_reg (GET_MODE_INNER (mode), op0);
6538 if (mode == V16QImode)
6540 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6541 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6545 if (mode == V8HImode)
6547 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6548 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6553 /* Store value to stack temp. Load vector element. Splat. However, splat
6554 of 64-bit items is not supported on Altivec. */
6555 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6557 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6558 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6559 XVECEXP (vals, 0, 0));
6560 x = gen_rtx_UNSPEC (VOIDmode,
6561 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6562 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6564 gen_rtx_SET (target, mem),
6566 x = gen_rtx_VEC_SELECT (inner_mode, target,
6567 gen_rtx_PARALLEL (VOIDmode,
6568 gen_rtvec (1, const0_rtx)));
6569 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6573 /* One field is non-constant. Load constant then overwrite
6577 rtx copy = copy_rtx (vals);
6579 /* Load constant part of vector, substitute neighboring value for
6581 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6582 rs6000_expand_vector_init (target, copy);
6584 /* Insert variable. */
6585 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6589 /* Construct the vector in memory one field at a time
6590 and load the whole vector. */
6591 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6592 for (i = 0; i < n_elts; i++)
6593 emit_move_insn (adjust_address_nv (mem, inner_mode,
6594 i * GET_MODE_SIZE (inner_mode)),
6595 XVECEXP (vals, 0, i));
6596 emit_move_insn (target, mem);
6599 /* Set field ELT of TARGET to VAL. */
6602 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6604 machine_mode mode = GET_MODE (target);
6605 machine_mode inner_mode = GET_MODE_INNER (mode);
6606 rtx reg = gen_reg_rtx (mode);
6608 int width = GET_MODE_SIZE (inner_mode);
6611 val = force_reg (GET_MODE (val), val);
6613 if (VECTOR_MEM_VSX_P (mode))
6615 rtx insn = NULL_RTX;
6616 rtx elt_rtx = GEN_INT (elt);
6618 if (mode == V2DFmode)
6619 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6621 else if (mode == V2DImode)
6622 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6624 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6626 if (mode == V4SImode)
6627 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6628 else if (mode == V8HImode)
6629 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6630 else if (mode == V16QImode)
6631 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6632 else if (mode == V4SFmode)
6633 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6643 /* Simplify setting single element vectors like V1TImode. */
6644 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6646 emit_move_insn (target, gen_lowpart (mode, val));
6650 /* Load single variable value. */
6651 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6652 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6653 x = gen_rtx_UNSPEC (VOIDmode,
6654 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6655 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6657 gen_rtx_SET (reg, mem),
6660 /* Linear sequence. */
6661 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6662 for (i = 0; i < 16; ++i)
6663 XVECEXP (mask, 0, i) = GEN_INT (i);
6665 /* Set permute mask to insert element into target. */
6666 for (i = 0; i < width; ++i)
6667 XVECEXP (mask, 0, elt*width + i)
6668 = GEN_INT (i + 0x10);
6669 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6671 if (BYTES_BIG_ENDIAN)
6672 x = gen_rtx_UNSPEC (mode,
6673 gen_rtvec (3, target, reg,
6674 force_reg (V16QImode, x)),
6678 if (TARGET_P9_VECTOR)
6679 x = gen_rtx_UNSPEC (mode,
6680 gen_rtvec (3, reg, target,
6681 force_reg (V16QImode, x)),
6685 /* Invert selector. We prefer to generate VNAND on P8 so
6686 that future fusion opportunities can kick in, but must
6687 generate VNOR elsewhere. */
6688 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6689 rtx iorx = (TARGET_P8_VECTOR
6690 ? gen_rtx_IOR (V16QImode, notx, notx)
6691 : gen_rtx_AND (V16QImode, notx, notx));
6692 rtx tmp = gen_reg_rtx (V16QImode);
6693 emit_insn (gen_rtx_SET (tmp, iorx));
6695 /* Permute with operands reversed and adjusted selector. */
6696 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6701 emit_insn (gen_rtx_SET (target, x));
6704 /* Extract field ELT from VEC into TARGET. */
6707 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6709 machine_mode mode = GET_MODE (vec);
6710 machine_mode inner_mode = GET_MODE_INNER (mode);
6713 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6720 emit_move_insn (target, gen_lowpart (TImode, vec));
6723 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6726 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6729 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6732 if (TARGET_DIRECT_MOVE_64BIT)
6734 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6740 if (TARGET_DIRECT_MOVE_64BIT)
6742 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6748 if (TARGET_DIRECT_MOVE_64BIT)
6750 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6756 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6757 && TARGET_DIRECT_MOVE_64BIT)
6759 if (GET_MODE (elt) != DImode)
6761 rtx tmp = gen_reg_rtx (DImode);
6762 convert_move (tmp, elt, 0);
6765 else if (!REG_P (elt))
6766 elt = force_reg (DImode, elt);
6771 emit_move_insn (target, gen_lowpart (TImode, vec));
6775 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6779 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6783 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6787 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6791 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6795 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6803 /* Allocate mode-sized buffer. */
6804 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6806 emit_move_insn (mem, vec);
6807 if (CONST_INT_P (elt))
6809 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6811 /* Add offset to field within buffer matching vector element. */
6812 mem = adjust_address_nv (mem, inner_mode,
6813 modulo_elt * GET_MODE_SIZE (inner_mode));
6814 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6818 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6819 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6820 rtx new_addr = gen_reg_rtx (Pmode);
6822 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6824 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6825 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6826 new_addr = change_address (mem, inner_mode, new_addr);
6827 emit_move_insn (target, new_addr);
6831 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6832 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6833 temporary (BASE_TMP) to fixup the address. Return the new memory address
6834 that is valid for reads or writes to a given register (SCALAR_REG). */
6837 rs6000_adjust_vec_address (rtx scalar_reg,
6841 machine_mode scalar_mode)
6843 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6844 rtx addr = XEXP (mem, 0);
6849 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6850 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6852 /* Calculate what we need to add to the address to get the element
6854 if (CONST_INT_P (element))
6855 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6858 int byte_shift = exact_log2 (scalar_size);
6859 gcc_assert (byte_shift >= 0);
6861 if (byte_shift == 0)
6862 element_offset = element;
6866 if (TARGET_POWERPC64)
6867 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6869 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6871 element_offset = base_tmp;
6875 /* Create the new address pointing to the element within the vector. If we
6876 are adding 0, we don't have to change the address. */
6877 if (element_offset == const0_rtx)
6880 /* A simple indirect address can be converted into a reg + offset
6882 else if (REG_P (addr) || SUBREG_P (addr))
6883 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6885 /* Optimize D-FORM addresses with constant offset with a constant element, to
6886 include the element offset in the address directly. */
6887 else if (GET_CODE (addr) == PLUS)
6889 rtx op0 = XEXP (addr, 0);
6890 rtx op1 = XEXP (addr, 1);
6893 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6894 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6896 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6897 rtx offset_rtx = GEN_INT (offset);
6899 if (IN_RANGE (offset, -32768, 32767)
6900 && (scalar_size < 8 || (offset & 0x3) == 0))
6901 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6904 emit_move_insn (base_tmp, offset_rtx);
6905 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6910 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6911 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6913 /* Note, ADDI requires the register being added to be a base
6914 register. If the register was R0, load it up into the temporary
6917 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6919 insn = gen_add3_insn (base_tmp, op1, element_offset);
6920 gcc_assert (insn != NULL_RTX);
6925 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6927 insn = gen_add3_insn (base_tmp, element_offset, op1);
6928 gcc_assert (insn != NULL_RTX);
6934 emit_move_insn (base_tmp, op1);
6935 emit_insn (gen_add2_insn (base_tmp, element_offset));
6938 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6944 emit_move_insn (base_tmp, addr);
6945 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6948 /* If we have a PLUS, we need to see whether the particular register class
6949 allows for D-FORM or X-FORM addressing. */
6950 if (GET_CODE (new_addr) == PLUS)
6952 rtx op1 = XEXP (new_addr, 1);
6953 addr_mask_type addr_mask;
6954 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6956 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6957 if (INT_REGNO_P (scalar_regno))
6958 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6960 else if (FP_REGNO_P (scalar_regno))
6961 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6963 else if (ALTIVEC_REGNO_P (scalar_regno))
6964 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6969 if (REG_P (op1) || SUBREG_P (op1))
6970 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6972 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6975 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6976 valid_addr_p = true;
6979 valid_addr_p = false;
6983 emit_move_insn (base_tmp, new_addr);
6984 new_addr = base_tmp;
6987 return change_address (mem, scalar_mode, new_addr);
6990 /* Split a variable vec_extract operation into the component instructions. */
6993 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6996 machine_mode mode = GET_MODE (src);
6997 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6998 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6999 int byte_shift = exact_log2 (scalar_size);
7001 gcc_assert (byte_shift >= 0);
7003 /* If we are given a memory address, optimize to load just the element. We
7004 don't have to adjust the vector element number on little endian
7008 int num_elements = GET_MODE_NUNITS (mode);
7009 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7011 emit_insn (gen_anddi3 (element, element, num_ele_m1));
7012 gcc_assert (REG_P (tmp_gpr));
7013 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7014 tmp_gpr, scalar_mode));
7018 else if (REG_P (src) || SUBREG_P (src))
7020 int num_elements = GET_MODE_NUNITS (mode);
7021 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7022 int bit_shift = 7 - exact_log2 (num_elements);
7024 unsigned int dest_regno = reg_or_subregno (dest);
7025 unsigned int src_regno = reg_or_subregno (src);
7026 unsigned int element_regno = reg_or_subregno (element);
7028 gcc_assert (REG_P (tmp_gpr));
7030 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7031 a general purpose register. */
7032 if (TARGET_P9_VECTOR
7033 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7034 && INT_REGNO_P (dest_regno)
7035 && ALTIVEC_REGNO_P (src_regno)
7036 && INT_REGNO_P (element_regno))
7038 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7039 rtx element_si = gen_rtx_REG (SImode, element_regno);
7041 if (mode == V16QImode)
7042 emit_insn (BYTES_BIG_ENDIAN
7043 ? gen_vextublx (dest_si, element_si, src)
7044 : gen_vextubrx (dest_si, element_si, src));
7046 else if (mode == V8HImode)
7048 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7049 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7050 emit_insn (BYTES_BIG_ENDIAN
7051 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7052 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7058 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7059 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7060 emit_insn (BYTES_BIG_ENDIAN
7061 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7062 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7069 gcc_assert (REG_P (tmp_altivec));
7071 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7072 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7073 will shift the element into the upper position (adding 3 to convert a
7074 byte shift into a bit shift). */
7075 if (scalar_size == 8)
7077 if (!BYTES_BIG_ENDIAN)
7079 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7085 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7087 emit_insn (gen_rtx_SET (tmp_gpr,
7088 gen_rtx_AND (DImode,
7089 gen_rtx_ASHIFT (DImode,
7096 if (!BYTES_BIG_ENDIAN)
7098 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7100 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7101 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7107 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7110 /* Get the value into the lower byte of the Altivec register where VSLO
7112 if (TARGET_P9_VECTOR)
7113 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7114 else if (can_create_pseudo_p ())
7115 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7118 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7119 emit_move_insn (tmp_di, tmp_gpr);
7120 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7123 /* Do the VSLO to get the value into the final location. */
7127 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7131 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7136 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7137 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7138 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7139 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7142 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7150 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7151 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7152 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7153 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7155 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7156 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7157 GEN_INT (64 - bits_in_element)));
7171 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7172 selects whether the alignment is abi mandated, optional, or
7173 both abi and optional alignment. */
7176 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7178 if (how != align_opt)
7180 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7184 if (how != align_abi)
7186 if (TREE_CODE (type) == ARRAY_TYPE
7187 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7189 if (align < BITS_PER_WORD)
7190 align = BITS_PER_WORD;
7197 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7198 instructions simply ignore the low bits; VSX memory instructions
7199 are aligned to 4 or 8 bytes. */
7202 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7204 return (STRICT_ALIGNMENT
7205 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7206 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7207 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7208 && (int) align < VECTOR_ALIGN (mode)))));
7211 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7214 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7216 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7218 if (computed != 128)
7221 if (!warned && warn_psabi)
7224 inform (input_location,
7225 "the layout of aggregates containing vectors with"
7226 " %d-byte alignment has changed in GCC 5",
7227 computed / BITS_PER_UNIT);
7230 /* In current GCC there is no special case. */
7237 /* AIX increases natural record alignment to doubleword if the first
7238 field is an FP double while the FP fields remain word aligned. */
7241 rs6000_special_round_type_align (tree type, unsigned int computed,
7242 unsigned int specified)
7244 unsigned int align = MAX (computed, specified);
7245 tree field = TYPE_FIELDS (type);
7247 /* Skip all non field decls */
7248 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7249 field = DECL_CHAIN (field);
7251 if (field != NULL && field != type)
7253 type = TREE_TYPE (field);
7254 while (TREE_CODE (type) == ARRAY_TYPE)
7255 type = TREE_TYPE (type);
7257 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7258 align = MAX (align, 64);
7264 /* Darwin increases record alignment to the natural alignment of
7268 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7269 unsigned int specified)
7271 unsigned int align = MAX (computed, specified);
7273 if (TYPE_PACKED (type))
7276 /* Find the first field, looking down into aggregates. */
7278 tree field = TYPE_FIELDS (type);
7279 /* Skip all non field decls */
7280 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7281 field = DECL_CHAIN (field);
7284 /* A packed field does not contribute any extra alignment. */
7285 if (DECL_PACKED (field))
7287 type = TREE_TYPE (field);
7288 while (TREE_CODE (type) == ARRAY_TYPE)
7289 type = TREE_TYPE (type);
7290 } while (AGGREGATE_TYPE_P (type));
7292 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7293 align = MAX (align, TYPE_ALIGN (type));
7298 /* Return 1 for an operand in small memory on V.4/eabi. */
7301 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7302 machine_mode mode ATTRIBUTE_UNUSED)
7307 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7310 if (DEFAULT_ABI != ABI_V4)
7313 if (SYMBOL_REF_P (op))
7316 else if (GET_CODE (op) != CONST
7317 || GET_CODE (XEXP (op, 0)) != PLUS
7318 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7319 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7324 rtx sum = XEXP (op, 0);
7325 HOST_WIDE_INT summand;
7327 /* We have to be careful here, because it is the referenced address
7328 that must be 32k from _SDA_BASE_, not just the symbol. */
7329 summand = INTVAL (XEXP (sum, 1));
7330 if (summand < 0 || summand > g_switch_value)
7333 sym_ref = XEXP (sum, 0);
7336 return SYMBOL_REF_SMALL_P (sym_ref);
7342 /* Return true if either operand is a general purpose register. */
7345 gpr_or_gpr_p (rtx op0, rtx op1)
7347 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7348 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7351 /* Return true if this is a move direct operation between GPR registers and
7352 floating point/VSX registers. */
7355 direct_move_p (rtx op0, rtx op1)
7357 if (!REG_P (op0) || !REG_P (op1))
7360 if (!TARGET_DIRECT_MOVE)
7363 int regno0 = REGNO (op0);
7364 int regno1 = REGNO (op1);
7365 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7368 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7371 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7377 /* Return true if the ADDR is an acceptable address for a quad memory
7378 operation of mode MODE (either LQ/STQ for general purpose registers, or
7379 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7380 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7381 3.0 LXV/STXV instruction. */
7384 quad_address_p (rtx addr, machine_mode mode, bool strict)
7388 if (GET_MODE_SIZE (mode) != 16)
7391 if (legitimate_indirect_address_p (addr, strict))
7394 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7397 if (GET_CODE (addr) != PLUS)
7400 op0 = XEXP (addr, 0);
7401 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7404 op1 = XEXP (addr, 1);
7405 if (!CONST_INT_P (op1))
7408 return quad_address_offset_p (INTVAL (op1));
7411 /* Return true if this is a load or store quad operation. This function does
7412 not handle the atomic quad memory instructions. */
7415 quad_load_store_p (rtx op0, rtx op1)
7419 if (!TARGET_QUAD_MEMORY)
7422 else if (REG_P (op0) && MEM_P (op1))
7423 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7424 && quad_memory_operand (op1, GET_MODE (op1))
7425 && !reg_overlap_mentioned_p (op0, op1));
7427 else if (MEM_P (op0) && REG_P (op1))
7428 ret = (quad_memory_operand (op0, GET_MODE (op0))
7429 && quad_int_reg_operand (op1, GET_MODE (op1)));
7434 if (TARGET_DEBUG_ADDR)
7436 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7437 ret ? "true" : "false");
7438 debug_rtx (gen_rtx_SET (op0, op1));
7444 /* Given an address, return a constant offset term if one exists. */
7447 address_offset (rtx op)
7449 if (GET_CODE (op) == PRE_INC
7450 || GET_CODE (op) == PRE_DEC)
7452 else if (GET_CODE (op) == PRE_MODIFY
7453 || GET_CODE (op) == LO_SUM)
7456 if (GET_CODE (op) == CONST)
7459 if (GET_CODE (op) == PLUS)
7462 if (CONST_INT_P (op))
7468 /* Return true if the MEM operand is a memory operand suitable for use
7469 with a (full width, possibly multiple) gpr load/store. On
7470 powerpc64 this means the offset must be divisible by 4.
7471 Implements 'Y' constraint.
7473 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7474 a constraint function we know the operand has satisfied a suitable
7477 Offsetting a lo_sum should not be allowed, except where we know by
7478 alignment that a 32k boundary is not crossed. Note that by
7479 "offsetting" here we mean a further offset to access parts of the
7480 MEM. It's fine to have a lo_sum where the inner address is offset
7481 from a sym, since the same sym+offset will appear in the high part
7482 of the address calculation. */
7485 mem_operand_gpr (rtx op, machine_mode mode)
7487 unsigned HOST_WIDE_INT offset;
7489 rtx addr = XEXP (op, 0);
7491 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7493 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7494 && mode_supports_pre_incdec_p (mode)
7495 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7498 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7499 if (!rs6000_offsettable_memref_p (op, mode, false))
7502 op = address_offset (addr);
7506 offset = INTVAL (op);
7507 if (TARGET_POWERPC64 && (offset & 3) != 0)
7510 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7514 if (GET_CODE (addr) == LO_SUM)
7515 /* For lo_sum addresses, we must allow any offset except one that
7516 causes a wrap, so test only the low 16 bits. */
7517 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7519 return offset + 0x8000 < 0x10000u - extra;
7522 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7523 enforce an offset divisible by 4 even for 32-bit. */
7526 mem_operand_ds_form (rtx op, machine_mode mode)
7528 unsigned HOST_WIDE_INT offset;
7530 rtx addr = XEXP (op, 0);
7532 if (!offsettable_address_p (false, mode, addr))
7535 op = address_offset (addr);
7539 offset = INTVAL (op);
7540 if ((offset & 3) != 0)
7543 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7547 if (GET_CODE (addr) == LO_SUM)
7548 /* For lo_sum addresses, we must allow any offset except one that
7549 causes a wrap, so test only the low 16 bits. */
7550 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7552 return offset + 0x8000 < 0x10000u - extra;
7555 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7558 reg_offset_addressing_ok_p (machine_mode mode)
7572 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7573 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7574 a vector mode, if we want to use the VSX registers to move it around,
7575 we need to restrict ourselves to reg+reg addressing. Similarly for
7576 IEEE 128-bit floating point that is passed in a single vector
7578 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7579 return mode_supports_dq_form (mode);
7583 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7584 addressing for the LFIWZX and STFIWX instructions. */
7585 if (TARGET_NO_SDMODE_STACK)
7597 virtual_stack_registers_memory_p (rtx op)
7602 regnum = REGNO (op);
7604 else if (GET_CODE (op) == PLUS
7605 && REG_P (XEXP (op, 0))
7606 && CONST_INT_P (XEXP (op, 1)))
7607 regnum = REGNO (XEXP (op, 0));
7612 return (regnum >= FIRST_VIRTUAL_REGISTER
7613 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7616 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7617 is known to not straddle a 32k boundary. This function is used
7618 to determine whether -mcmodel=medium code can use TOC pointer
7619 relative addressing for OP. This means the alignment of the TOC
7620 pointer must also be taken into account, and unfortunately that is
7623 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7624 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7628 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7632 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7634 if (!SYMBOL_REF_P (op))
7637 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7639 if (mode_supports_dq_form (mode))
7642 dsize = GET_MODE_SIZE (mode);
7643 decl = SYMBOL_REF_DECL (op);
7649 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7650 replacing memory addresses with an anchor plus offset. We
7651 could find the decl by rummaging around in the block->objects
7652 VEC for the given offset but that seems like too much work. */
7653 dalign = BITS_PER_UNIT;
7654 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7655 && SYMBOL_REF_ANCHOR_P (op)
7656 && SYMBOL_REF_BLOCK (op) != NULL)
7658 struct object_block *block = SYMBOL_REF_BLOCK (op);
7660 dalign = block->alignment;
7661 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7663 else if (CONSTANT_POOL_ADDRESS_P (op))
7665 /* It would be nice to have get_pool_align().. */
7666 machine_mode cmode = get_pool_mode (op);
7668 dalign = GET_MODE_ALIGNMENT (cmode);
7671 else if (DECL_P (decl))
7673 dalign = DECL_ALIGN (decl);
7677 /* Allow BLKmode when the entire object is known to not
7678 cross a 32k boundary. */
7679 if (!DECL_SIZE_UNIT (decl))
7682 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7685 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7689 dalign /= BITS_PER_UNIT;
7690 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7691 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7692 return dalign >= dsize;
7698 /* Find how many bits of the alignment we know for this access. */
7699 dalign /= BITS_PER_UNIT;
7700 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7701 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7703 lsb = offset & -offset;
7707 return dalign >= dsize;
7711 constant_pool_expr_p (rtx op)
7715 split_const (op, &base, &offset);
7716 return (SYMBOL_REF_P (base)
7717 && CONSTANT_POOL_ADDRESS_P (base)
7718 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7721 /* These are only used to pass through from print_operand/print_operand_address
7722 to rs6000_output_addr_const_extra over the intervening function
7723 output_addr_const which is not target code. */
7724 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7726 /* Return true if OP is a toc pointer relative address (the output
7727 of create_TOC_reference). If STRICT, do not match non-split
7728 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7729 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7730 TOCREL_OFFSET_RET respectively. */
7733 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7734 const_rtx *tocrel_offset_ret)
7739 if (TARGET_CMODEL != CMODEL_SMALL)
7741 /* When strict ensure we have everything tidy. */
7743 && !(GET_CODE (op) == LO_SUM
7744 && REG_P (XEXP (op, 0))
7745 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7748 /* When not strict, allow non-split TOC addresses and also allow
7749 (lo_sum (high ..)) TOC addresses created during reload. */
7750 if (GET_CODE (op) == LO_SUM)
7754 const_rtx tocrel_base = op;
7755 const_rtx tocrel_offset = const0_rtx;
7757 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7759 tocrel_base = XEXP (op, 0);
7760 tocrel_offset = XEXP (op, 1);
7763 if (tocrel_base_ret)
7764 *tocrel_base_ret = tocrel_base;
7765 if (tocrel_offset_ret)
7766 *tocrel_offset_ret = tocrel_offset;
7768 return (GET_CODE (tocrel_base) == UNSPEC
7769 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7770 && REG_P (XVECEXP (tocrel_base, 0, 1))
7771 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7774 /* Return true if X is a constant pool address, and also for cmodel=medium
7775 if X is a toc-relative address known to be offsettable within MODE. */
7778 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7781 const_rtx tocrel_base, tocrel_offset;
7782 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7783 && (TARGET_CMODEL != CMODEL_MEDIUM
7784 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7786 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7787 INTVAL (tocrel_offset), mode)));
7791 legitimate_small_data_p (machine_mode mode, rtx x)
7793 return (DEFAULT_ABI == ABI_V4
7794 && !flag_pic && !TARGET_TOC
7795 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7796 && small_data_operand (x, mode));
7800 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7801 bool strict, bool worst_case)
7803 unsigned HOST_WIDE_INT offset;
7806 if (GET_CODE (x) != PLUS)
7808 if (!REG_P (XEXP (x, 0)))
7810 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7812 if (mode_supports_dq_form (mode))
7813 return quad_address_p (x, mode, strict);
7814 if (!reg_offset_addressing_ok_p (mode))
7815 return virtual_stack_registers_memory_p (x);
7816 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7818 if (!CONST_INT_P (XEXP (x, 1)))
7821 offset = INTVAL (XEXP (x, 1));
7828 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7830 if (VECTOR_MEM_VSX_P (mode))
7835 if (!TARGET_POWERPC64)
7837 else if (offset & 3)
7850 if (!TARGET_POWERPC64)
7852 else if (offset & 3)
7861 return offset < 0x10000 - extra;
7865 legitimate_indexed_address_p (rtx x, int strict)
7869 if (GET_CODE (x) != PLUS)
7875 return (REG_P (op0) && REG_P (op1)
7876 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7877 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7878 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7879 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7883 avoiding_indexed_address_p (machine_mode mode)
7885 /* Avoid indexed addressing for modes that have non-indexed
7886 load/store instruction forms. */
7887 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7891 legitimate_indirect_address_p (rtx x, int strict)
7893 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7897 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7899 if (!TARGET_MACHO || !flag_pic
7900 || mode != SImode || !MEM_P (x))
7904 if (GET_CODE (x) != LO_SUM)
7906 if (!REG_P (XEXP (x, 0)))
7908 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7912 return CONSTANT_P (x);
7916 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7918 if (GET_CODE (x) != LO_SUM)
7920 if (!REG_P (XEXP (x, 0)))
7922 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7924 /* quad word addresses are restricted, and we can't use LO_SUM. */
7925 if (mode_supports_dq_form (mode))
7929 if (TARGET_ELF || TARGET_MACHO)
7933 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7935 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7936 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7937 recognizes some LO_SUM addresses as valid although this
7938 function says opposite. In most cases, LRA through different
7939 transformations can generate correct code for address reloads.
7940 It cannot manage only some LO_SUM cases. So we need to add
7941 code here saying that some addresses are still valid. */
7942 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7943 && small_toc_ref (x, VOIDmode));
7944 if (TARGET_TOC && ! large_toc_ok)
7946 if (GET_MODE_NUNITS (mode) != 1)
7948 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7949 && !(/* ??? Assume floating point reg based on mode? */
7950 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
7953 return CONSTANT_P (x) || large_toc_ok;
7960 /* Try machine-dependent ways of modifying an illegitimate address
7961 to be legitimate. If we find one, return the new, valid address.
7962 This is used from only one place: `memory_address' in explow.c.
7964 OLDX is the address as it was before break_out_memory_refs was
7965 called. In some cases it is useful to look at this to decide what
7968 It is always safe for this function to do nothing. It exists to
7969 recognize opportunities to optimize the output.
7971 On RS/6000, first check for the sum of a register with a constant
7972 integer that is out of range. If so, generate code to add the
7973 constant with the low-order 16 bits masked to the register and force
7974 this result into another register (this can be done with `cau').
7975 Then generate an address of REG+(CONST&0xffff), allowing for the
7976 possibility of bit 16 being a one.
7978 Then check for the sum of a register and something not constant, try to
7979 load the other things into a register and return the sum. */
7982 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7987 if (!reg_offset_addressing_ok_p (mode)
7988 || mode_supports_dq_form (mode))
7990 if (virtual_stack_registers_memory_p (x))
7993 /* In theory we should not be seeing addresses of the form reg+0,
7994 but just in case it is generated, optimize it away. */
7995 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7996 return force_reg (Pmode, XEXP (x, 0));
7998 /* For TImode with load/store quad, restrict addresses to just a single
7999 pointer, so it works with both GPRs and VSX registers. */
8000 /* Make sure both operands are registers. */
8001 else if (GET_CODE (x) == PLUS
8002 && (mode != TImode || !TARGET_VSX))
8003 return gen_rtx_PLUS (Pmode,
8004 force_reg (Pmode, XEXP (x, 0)),
8005 force_reg (Pmode, XEXP (x, 1)));
8007 return force_reg (Pmode, x);
8009 if (SYMBOL_REF_P (x))
8011 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8013 return rs6000_legitimize_tls_address (x, model);
8025 /* As in legitimate_offset_address_p we do not assume
8026 worst-case. The mode here is just a hint as to the registers
8027 used. A TImode is usually in gprs, but may actually be in
8028 fprs. Leave worst-case scenario for reload to handle via
8029 insn constraints. PTImode is only GPRs. */
8036 if (GET_CODE (x) == PLUS
8037 && REG_P (XEXP (x, 0))
8038 && CONST_INT_P (XEXP (x, 1))
8039 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8040 >= 0x10000 - extra))
8042 HOST_WIDE_INT high_int, low_int;
8044 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8045 if (low_int >= 0x8000 - extra)
8047 high_int = INTVAL (XEXP (x, 1)) - low_int;
8048 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8049 GEN_INT (high_int)), 0);
8050 return plus_constant (Pmode, sum, low_int);
8052 else if (GET_CODE (x) == PLUS
8053 && REG_P (XEXP (x, 0))
8054 && !CONST_INT_P (XEXP (x, 1))
8055 && GET_MODE_NUNITS (mode) == 1
8056 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8057 || (/* ??? Assume floating point reg based on mode? */
8058 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8059 && !avoiding_indexed_address_p (mode))
8061 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8062 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8064 else if ((TARGET_ELF
8066 || !MACHO_DYNAMIC_NO_PIC_P
8073 && !CONST_WIDE_INT_P (x)
8074 && !CONST_DOUBLE_P (x)
8076 && GET_MODE_NUNITS (mode) == 1
8077 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8078 || (/* ??? Assume floating point reg based on mode? */
8079 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8081 rtx reg = gen_reg_rtx (Pmode);
8083 emit_insn (gen_elf_high (reg, x));
8085 emit_insn (gen_macho_high (reg, x));
8086 return gen_rtx_LO_SUM (Pmode, reg, x);
8090 && constant_pool_expr_p (x)
8091 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8092 return create_TOC_reference (x, NULL_RTX);
8097 /* Debug version of rs6000_legitimize_address. */
8099 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8105 ret = rs6000_legitimize_address (x, oldx, mode);
8106 insns = get_insns ();
8112 "\nrs6000_legitimize_address: mode %s, old code %s, "
8113 "new code %s, modified\n",
8114 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8115 GET_RTX_NAME (GET_CODE (ret)));
8117 fprintf (stderr, "Original address:\n");
8120 fprintf (stderr, "oldx:\n");
8123 fprintf (stderr, "New address:\n");
8128 fprintf (stderr, "Insns added:\n");
8129 debug_rtx_list (insns, 20);
8135 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8136 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8147 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8148 We need to emit DTP-relative relocations. */
8150 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8152 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8157 fputs ("\t.long\t", file);
8160 fputs (DOUBLE_INT_ASM_OP, file);
8165 output_addr_const (file, x);
8167 fputs ("@dtprel+0x8000", file);
8168 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8170 switch (SYMBOL_REF_TLS_MODEL (x))
8174 case TLS_MODEL_LOCAL_EXEC:
8175 fputs ("@le", file);
8177 case TLS_MODEL_INITIAL_EXEC:
8178 fputs ("@ie", file);
8180 case TLS_MODEL_GLOBAL_DYNAMIC:
8181 case TLS_MODEL_LOCAL_DYNAMIC:
8190 /* Return true if X is a symbol that refers to real (rather than emulated)
8194 rs6000_real_tls_symbol_ref_p (rtx x)
8196 return (SYMBOL_REF_P (x)
8197 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8200 /* In the name of slightly smaller debug output, and to cater to
8201 general assembler lossage, recognize various UNSPEC sequences
8202 and turn them back into a direct symbol reference. */
8205 rs6000_delegitimize_address (rtx orig_x)
8209 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8210 orig_x = XVECEXP (orig_x, 0, 0);
8212 orig_x = delegitimize_mem_from_attrs (orig_x);
8219 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8223 if (GET_CODE (y) == PLUS
8224 && GET_MODE (y) == Pmode
8225 && CONST_INT_P (XEXP (y, 1)))
8227 offset = XEXP (y, 1);
8231 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8233 y = XVECEXP (y, 0, 0);
8236 /* Do not associate thread-local symbols with the original
8237 constant pool symbol. */
8240 && CONSTANT_POOL_ADDRESS_P (y)
8241 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8245 if (offset != NULL_RTX)
8246 y = gen_rtx_PLUS (Pmode, y, offset);
8247 if (!MEM_P (orig_x))
8250 return replace_equiv_address_nv (orig_x, y);
8254 && GET_CODE (orig_x) == LO_SUM
8255 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8257 y = XEXP (XEXP (orig_x, 1), 0);
8258 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8259 return XVECEXP (y, 0, 0);
8265 /* Return true if X shouldn't be emitted into the debug info.
8266 The linker doesn't like .toc section references from
8267 .debug_* sections, so reject .toc section symbols. */
8270 rs6000_const_not_ok_for_debug_p (rtx x)
8272 if (GET_CODE (x) == UNSPEC)
8274 if (SYMBOL_REF_P (x)
8275 && CONSTANT_POOL_ADDRESS_P (x))
8277 rtx c = get_pool_constant (x);
8278 machine_mode cmode = get_pool_mode (x);
8279 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8286 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8289 rs6000_legitimate_combined_insn (rtx_insn *insn)
8291 int icode = INSN_CODE (insn);
8293 /* Reject creating doloop insns. Combine should not be allowed
8294 to create these for a number of reasons:
8295 1) In a nested loop, if combine creates one of these in an
8296 outer loop and the register allocator happens to allocate ctr
8297 to the outer loop insn, then the inner loop can't use ctr.
8298 Inner loops ought to be more highly optimized.
8299 2) Combine often wants to create one of these from what was
8300 originally a three insn sequence, first combining the three
8301 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8302 allocated ctr, the splitter takes use back to the three insn
8303 sequence. It's better to stop combine at the two insn
8305 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8306 insns, the register allocator sometimes uses floating point
8307 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8308 jump insn and output reloads are not implemented for jumps,
8309 the ctrsi/ctrdi splitters need to handle all possible cases.
8310 That's a pain, and it gets to be seriously difficult when a
8311 splitter that runs after reload needs memory to transfer from
8312 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8313 for the difficult case. It's better to not create problems
8314 in the first place. */
8315 if (icode != CODE_FOR_nothing
8316 && (icode == CODE_FOR_bdz_si
8317 || icode == CODE_FOR_bdz_di
8318 || icode == CODE_FOR_bdnz_si
8319 || icode == CODE_FOR_bdnz_di
8320 || icode == CODE_FOR_bdztf_si
8321 || icode == CODE_FOR_bdztf_di
8322 || icode == CODE_FOR_bdnztf_si
8323 || icode == CODE_FOR_bdnztf_di))
8329 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8331 static GTY(()) rtx rs6000_tls_symbol;
8333 rs6000_tls_get_addr (void)
8335 if (!rs6000_tls_symbol)
8336 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8338 return rs6000_tls_symbol;
8341 /* Construct the SYMBOL_REF for TLS GOT references. */
8343 static GTY(()) rtx rs6000_got_symbol;
8345 rs6000_got_sym (void)
8347 if (!rs6000_got_symbol)
8349 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8350 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8351 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8354 return rs6000_got_symbol;
8357 /* AIX Thread-Local Address support. */
8360 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8362 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8366 name = XSTR (addr, 0);
8367 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8368 or the symbol will be in TLS private data section. */
8369 if (name[strlen (name) - 1] != ']'
8370 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8371 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8373 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8374 strcpy (tlsname, name);
8376 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8377 tlsaddr = copy_rtx (addr);
8378 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8383 /* Place addr into TOC constant pool. */
8384 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8386 /* Output the TOC entry and create the MEM referencing the value. */
8387 if (constant_pool_expr_p (XEXP (sym, 0))
8388 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8390 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8391 mem = gen_const_mem (Pmode, tocref);
8392 set_mem_alias_set (mem, get_TOC_alias_set ());
8397 /* Use global-dynamic for local-dynamic. */
8398 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8399 || model == TLS_MODEL_LOCAL_DYNAMIC)
8401 /* Create new TOC reference for @m symbol. */
8402 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8403 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8404 strcpy (tlsname, "*LCM");
8405 strcat (tlsname, name + 3);
8406 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8407 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8408 tocref = create_TOC_reference (modaddr, NULL_RTX);
8409 rtx modmem = gen_const_mem (Pmode, tocref);
8410 set_mem_alias_set (modmem, get_TOC_alias_set ());
8412 rtx modreg = gen_reg_rtx (Pmode);
8413 emit_insn (gen_rtx_SET (modreg, modmem));
8415 tmpreg = gen_reg_rtx (Pmode);
8416 emit_insn (gen_rtx_SET (tmpreg, mem));
8418 dest = gen_reg_rtx (Pmode);
8420 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8422 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8425 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8426 else if (TARGET_32BIT)
8428 tlsreg = gen_reg_rtx (SImode);
8429 emit_insn (gen_tls_get_tpointer (tlsreg));
8432 tlsreg = gen_rtx_REG (DImode, 13);
8434 /* Load the TOC value into temporary register. */
8435 tmpreg = gen_reg_rtx (Pmode);
8436 emit_insn (gen_rtx_SET (tmpreg, mem));
8437 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8438 gen_rtx_MINUS (Pmode, addr, tlsreg));
8440 /* Add TOC symbol value to TLS pointer. */
8441 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8446 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8447 __tls_get_addr call. */
8450 rs6000_output_tlsargs (rtx *operands)
8452 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8455 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8456 op[0] = operands[0];
8457 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8458 op[1] = XVECEXP (operands[2], 0, 0);
8459 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8461 /* The GOT register. */
8462 op[2] = XVECEXP (operands[2], 0, 1);
8463 if (TARGET_CMODEL != CMODEL_SMALL)
8464 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8465 "addi %0,%0,%1@got@tlsgd@l", op);
8467 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8469 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8471 if (TARGET_CMODEL != CMODEL_SMALL)
8472 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8473 "addi %0,%0,%&@got@tlsld@l", op);
8475 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8481 /* Passes the tls arg value for global dynamic and local dynamic
8482 emit_library_call_value in rs6000_legitimize_tls_address to
8483 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8484 marker relocs put on __tls_get_addr calls. */
8485 static rtx global_tlsarg;
8487 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8488 this (thread-local) address. */
8491 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8496 return rs6000_legitimize_tls_address_aix (addr, model);
8498 dest = gen_reg_rtx (Pmode);
8499 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8505 tlsreg = gen_rtx_REG (Pmode, 13);
8506 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8510 tlsreg = gen_rtx_REG (Pmode, 2);
8511 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8515 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8519 tmp = gen_reg_rtx (Pmode);
8522 tlsreg = gen_rtx_REG (Pmode, 13);
8523 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8527 tlsreg = gen_rtx_REG (Pmode, 2);
8528 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8532 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8534 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8539 rtx got, tga, tmp1, tmp2;
8541 /* We currently use relocations like @got@tlsgd for tls, which
8542 means the linker will handle allocation of tls entries, placing
8543 them in the .got section. So use a pointer to the .got section,
8544 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8545 or to secondary GOT sections used by 32-bit -fPIC. */
8547 got = gen_rtx_REG (Pmode, 2);
8551 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8554 rtx gsym = rs6000_got_sym ();
8555 got = gen_reg_rtx (Pmode);
8557 rs6000_emit_move (got, gsym, Pmode);
8562 tmp1 = gen_reg_rtx (Pmode);
8563 tmp2 = gen_reg_rtx (Pmode);
8564 mem = gen_const_mem (Pmode, tmp1);
8565 lab = gen_label_rtx ();
8566 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8567 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8568 if (TARGET_LINK_STACK)
8569 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8570 emit_move_insn (tmp2, mem);
8571 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8572 set_unique_reg_note (last, REG_EQUAL, gsym);
8577 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8579 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8581 tga = rs6000_tls_get_addr ();
8582 global_tlsarg = arg;
8583 if (TARGET_TLS_MARKERS)
8585 rtx argreg = gen_rtx_REG (Pmode, 3);
8586 emit_insn (gen_rtx_SET (argreg, arg));
8587 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8591 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8592 global_tlsarg = NULL_RTX;
8594 /* Make a note so that the result of this call can be CSEd. */
8595 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8596 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8597 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8599 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8601 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8602 tga = rs6000_tls_get_addr ();
8603 tmp1 = gen_reg_rtx (Pmode);
8604 global_tlsarg = arg;
8605 if (TARGET_TLS_MARKERS)
8607 rtx argreg = gen_rtx_REG (Pmode, 3);
8608 emit_insn (gen_rtx_SET (argreg, arg));
8609 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8613 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8614 global_tlsarg = NULL_RTX;
8616 /* Make a note so that the result of this call can be CSEd. */
8617 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8618 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8619 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8621 if (rs6000_tls_size == 16)
8624 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8626 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8628 else if (rs6000_tls_size == 32)
8630 tmp2 = gen_reg_rtx (Pmode);
8632 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8634 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8637 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8639 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8643 tmp2 = gen_reg_rtx (Pmode);
8645 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8647 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8649 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8655 /* IE, or 64-bit offset LE. */
8656 tmp2 = gen_reg_rtx (Pmode);
8658 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8660 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8663 insn = gen_tls_tls_64 (dest, tmp2, addr);
8665 insn = gen_tls_tls_32 (dest, tmp2, addr);
8673 /* Only create the global variable for the stack protect guard if we are using
8674 the global flavor of that guard. */
8676 rs6000_init_stack_protect_guard (void)
8678 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8679 return default_stack_protect_guard ();
8684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8687 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8689 if (GET_CODE (x) == HIGH
8690 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8693 /* A TLS symbol in the TOC cannot contain a sum. */
8694 if (GET_CODE (x) == CONST
8695 && GET_CODE (XEXP (x, 0)) == PLUS
8696 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8697 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8700 /* Do not place an ELF TLS symbol in the constant pool. */
8701 return TARGET_ELF && tls_referenced_p (x);
8704 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8705 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8706 can be addressed relative to the toc pointer. */
8709 use_toc_relative_ref (rtx sym, machine_mode mode)
8711 return ((constant_pool_expr_p (sym)
8712 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8713 get_pool_mode (sym)))
8714 || (TARGET_CMODEL == CMODEL_MEDIUM
8715 && SYMBOL_REF_LOCAL_P (sym)
8716 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8719 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8720 that is a valid memory address for an instruction.
8721 The MODE argument is the machine mode for the MEM expression
8722 that wants to use this address.
8724 On the RS/6000, there are four valid address: a SYMBOL_REF that
8725 refers to a constant pool entry of an address (or the sum of it
8726 plus a constant), a short (16-bit signed) constant plus a register,
8727 the sum of two registers, or a register indirect, possibly with an
8728 auto-increment. For DFmode, DDmode and DImode with a constant plus
8729 register, we must ensure that both words are addressable or PowerPC64
8730 with offset word aligned.
8732 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8733 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8734 because adjacent memory cells are accessed by adding word-sized offsets
8735 during assembly output. */
8737 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8739 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8740 bool quad_offset_p = mode_supports_dq_form (mode);
8742 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8743 if (VECTOR_MEM_ALTIVEC_P (mode)
8744 && GET_CODE (x) == AND
8745 && CONST_INT_P (XEXP (x, 1))
8746 && INTVAL (XEXP (x, 1)) == -16)
8749 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8751 if (legitimate_indirect_address_p (x, reg_ok_strict))
8754 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8755 && mode_supports_pre_incdec_p (mode)
8756 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8758 /* Handle restricted vector d-form offsets in ISA 3.0. */
8761 if (quad_address_p (x, mode, reg_ok_strict))
8764 else if (virtual_stack_registers_memory_p (x))
8767 else if (reg_offset_p)
8769 if (legitimate_small_data_p (mode, x))
8771 if (legitimate_constant_pool_address_p (x, mode,
8772 reg_ok_strict || lra_in_progress))
8776 /* For TImode, if we have TImode in VSX registers, only allow register
8777 indirect addresses. This will allow the values to go in either GPRs
8778 or VSX registers without reloading. The vector types would tend to
8779 go into VSX registers, so we allow REG+REG, while TImode seems
8780 somewhat split, in that some uses are GPR based, and some VSX based. */
8781 /* FIXME: We could loosen this by changing the following to
8782 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8783 but currently we cannot allow REG+REG addressing for TImode. See
8784 PR72827 for complete details on how this ends up hoodwinking DSE. */
8785 if (mode == TImode && TARGET_VSX)
8787 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8790 && GET_CODE (x) == PLUS
8791 && REG_P (XEXP (x, 0))
8792 && (XEXP (x, 0) == virtual_stack_vars_rtx
8793 || XEXP (x, 0) == arg_pointer_rtx)
8794 && CONST_INT_P (XEXP (x, 1)))
8796 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8798 if (!FLOAT128_2REG_P (mode)
8799 && (TARGET_HARD_FLOAT
8801 || (mode != DFmode && mode != DDmode))
8802 && (TARGET_POWERPC64 || mode != DImode)
8803 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8805 && !avoiding_indexed_address_p (mode)
8806 && legitimate_indexed_address_p (x, reg_ok_strict))
8808 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8809 && mode_supports_pre_modify_p (mode)
8810 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8811 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8812 reg_ok_strict, false)
8813 || (!avoiding_indexed_address_p (mode)
8814 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8815 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8817 if (reg_offset_p && !quad_offset_p
8818 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8823 /* Debug version of rs6000_legitimate_address_p. */
8825 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8828 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8830 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8831 "strict = %d, reload = %s, code = %s\n",
8832 ret ? "true" : "false",
8833 GET_MODE_NAME (mode),
8835 (reload_completed ? "after" : "before"),
8836 GET_RTX_NAME (GET_CODE (x)));
8842 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8845 rs6000_mode_dependent_address_p (const_rtx addr,
8846 addr_space_t as ATTRIBUTE_UNUSED)
8848 return rs6000_mode_dependent_address_ptr (addr);
8851 /* Go to LABEL if ADDR (a legitimate address expression)
8852 has an effect that depends on the machine mode it is used for.
8854 On the RS/6000 this is true of all integral offsets (since AltiVec
8855 and VSX modes don't allow them) or is a pre-increment or decrement.
8857 ??? Except that due to conceptual problems in offsettable_address_p
8858 we can't really report the problems of integral offsets. So leave
8859 this assuming that the adjustable offset must be valid for the
8860 sub-words of a TFmode operand, which is what we had before. */
8863 rs6000_mode_dependent_address (const_rtx addr)
8865 switch (GET_CODE (addr))
8868 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8869 is considered a legitimate address before reload, so there
8870 are no offset restrictions in that case. Note that this
8871 condition is safe in strict mode because any address involving
8872 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8873 been rejected as illegitimate. */
8874 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8875 && XEXP (addr, 0) != arg_pointer_rtx
8876 && CONST_INT_P (XEXP (addr, 1)))
8878 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8879 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8884 /* Anything in the constant pool is sufficiently aligned that
8885 all bytes have the same high part address. */
8886 return !legitimate_constant_pool_address_p (addr, QImode, false);
8888 /* Auto-increment cases are now treated generically in recog.c. */
8890 return TARGET_UPDATE;
8892 /* AND is only allowed in Altivec loads. */
8903 /* Debug version of rs6000_mode_dependent_address. */
8905 rs6000_debug_mode_dependent_address (const_rtx addr)
8907 bool ret = rs6000_mode_dependent_address (addr);
8909 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8910 ret ? "true" : "false");
8916 /* Implement FIND_BASE_TERM. */
8919 rs6000_find_base_term (rtx op)
8924 if (GET_CODE (base) == CONST)
8925 base = XEXP (base, 0);
8926 if (GET_CODE (base) == PLUS)
8927 base = XEXP (base, 0);
8928 if (GET_CODE (base) == UNSPEC)
8929 switch (XINT (base, 1))
8932 case UNSPEC_MACHOPIC_OFFSET:
8933 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8934 for aliasing purposes. */
8935 return XVECEXP (base, 0, 0);
8941 /* More elaborate version of recog's offsettable_memref_p predicate
8942 that works around the ??? note of rs6000_mode_dependent_address.
8943 In particular it accepts
8945 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8947 in 32-bit mode, that the recog predicate rejects. */
8950 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8957 /* First mimic offsettable_memref_p. */
8958 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
8961 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8962 the latter predicate knows nothing about the mode of the memory
8963 reference and, therefore, assumes that it is the largest supported
8964 mode (TFmode). As a consequence, legitimate offsettable memory
8965 references are rejected. rs6000_legitimate_offset_address_p contains
8966 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8967 at least with a little bit of help here given that we know the
8968 actual registers used. */
8969 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8970 || GET_MODE_SIZE (reg_mode) == 4);
8971 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8972 strict, worst_case);
8975 /* Determine the reassociation width to be used in reassociate_bb.
8976 This takes into account how many parallel operations we
8977 can actually do of a given type, and also the latency.
8981 vect add/sub/mul 2/cycle
8982 fp add/sub/mul 2/cycle
8987 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
8990 switch (rs6000_tune)
8992 case PROCESSOR_POWER8:
8993 case PROCESSOR_POWER9:
8994 case PROCESSOR_FUTURE:
8995 if (DECIMAL_FLOAT_MODE_P (mode))
8997 if (VECTOR_MODE_P (mode))
8999 if (INTEGRAL_MODE_P (mode))
9001 if (FLOAT_MODE_P (mode))
9010 /* Change register usage conditional on target flags. */
9012 rs6000_conditional_register_usage (void)
9016 if (TARGET_DEBUG_TARGET)
9017 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9019 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9021 fixed_regs[13] = call_used_regs[13]
9022 = call_really_used_regs[13] = 1;
9024 /* Conditionally disable FPRs. */
9025 if (TARGET_SOFT_FLOAT)
9026 for (i = 32; i < 64; i++)
9027 fixed_regs[i] = call_used_regs[i]
9028 = call_really_used_regs[i] = 1;
9030 /* The TOC register is not killed across calls in a way that is
9031 visible to the compiler. */
9032 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9033 call_really_used_regs[2] = 0;
9035 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9036 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9038 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9039 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9040 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9041 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9043 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9044 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9045 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9046 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9048 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9049 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9050 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9052 if (!TARGET_ALTIVEC && !TARGET_VSX)
9054 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9055 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9056 call_really_used_regs[VRSAVE_REGNO] = 1;
9059 if (TARGET_ALTIVEC || TARGET_VSX)
9060 global_regs[VSCR_REGNO] = 1;
9062 if (TARGET_ALTIVEC_ABI)
9064 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9065 call_used_regs[i] = call_really_used_regs[i] = 1;
9067 /* AIX reserves VR20:31 in non-extended ABI mode. */
9069 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9070 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9075 /* Output insns to set DEST equal to the constant SOURCE as a series of
9076 lis, ori and shl instructions and return TRUE. */
9079 rs6000_emit_set_const (rtx dest, rtx source)
9081 machine_mode mode = GET_MODE (dest);
9086 gcc_checking_assert (CONST_INT_P (source));
9087 c = INTVAL (source);
9092 emit_insn (gen_rtx_SET (dest, source));
9096 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9098 emit_insn (gen_rtx_SET (copy_rtx (temp),
9099 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9100 emit_insn (gen_rtx_SET (dest,
9101 gen_rtx_IOR (SImode, copy_rtx (temp),
9102 GEN_INT (c & 0xffff))));
9106 if (!TARGET_POWERPC64)
9110 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9112 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9114 emit_move_insn (hi, GEN_INT (c >> 32));
9115 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9116 emit_move_insn (lo, GEN_INT (c));
9119 rs6000_emit_set_long_const (dest, c);
9126 insn = get_last_insn ();
9127 set = single_set (insn);
9128 if (! CONSTANT_P (SET_SRC (set)))
9129 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9134 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9135 Output insns to set DEST equal to the constant C as a series of
9136 lis, ori and shl instructions. */
9139 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9142 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9152 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9153 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9154 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9156 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9157 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9159 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9161 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9162 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9164 emit_move_insn (dest,
9165 gen_rtx_IOR (DImode, copy_rtx (temp),
9168 else if (ud3 == 0 && ud4 == 0)
9170 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9172 gcc_assert (ud2 & 0x8000);
9173 emit_move_insn (copy_rtx (temp),
9174 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9176 emit_move_insn (copy_rtx (temp),
9177 gen_rtx_IOR (DImode, copy_rtx (temp),
9179 emit_move_insn (dest,
9180 gen_rtx_ZERO_EXTEND (DImode,
9181 gen_lowpart (SImode,
9184 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9185 || (ud4 == 0 && ! (ud3 & 0x8000)))
9187 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9189 emit_move_insn (copy_rtx (temp),
9190 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9192 emit_move_insn (copy_rtx (temp),
9193 gen_rtx_IOR (DImode, copy_rtx (temp),
9195 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9196 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9199 emit_move_insn (dest,
9200 gen_rtx_IOR (DImode, copy_rtx (temp),
9205 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9207 emit_move_insn (copy_rtx (temp),
9208 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9210 emit_move_insn (copy_rtx (temp),
9211 gen_rtx_IOR (DImode, copy_rtx (temp),
9214 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9215 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9218 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9219 gen_rtx_IOR (DImode, copy_rtx (temp),
9220 GEN_INT (ud2 << 16)));
9222 emit_move_insn (dest,
9223 gen_rtx_IOR (DImode, copy_rtx (temp),
9228 /* Helper for the following. Get rid of [r+r] memory refs
9229 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9232 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9234 if (MEM_P (operands[0])
9235 && !REG_P (XEXP (operands[0], 0))
9236 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9237 GET_MODE (operands[0]), false))
9239 = replace_equiv_address (operands[0],
9240 copy_addr_to_reg (XEXP (operands[0], 0)));
9242 if (MEM_P (operands[1])
9243 && !REG_P (XEXP (operands[1], 0))
9244 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9245 GET_MODE (operands[1]), false))
9247 = replace_equiv_address (operands[1],
9248 copy_addr_to_reg (XEXP (operands[1], 0)));
9251 /* Generate a vector of constants to permute MODE for a little-endian
9252 storage operation by swapping the two halves of a vector. */
9254 rs6000_const_vec (machine_mode mode)
9282 v = rtvec_alloc (subparts);
9284 for (i = 0; i < subparts / 2; ++i)
9285 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9286 for (i = subparts / 2; i < subparts; ++i)
9287 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9292 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9295 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9297 /* Scalar permutations are easier to express in integer modes rather than
9298 floating-point modes, so cast them here. We use V1TImode instead
9299 of TImode to ensure that the values don't go through GPRs. */
9300 if (FLOAT128_VECTOR_P (mode))
9302 dest = gen_lowpart (V1TImode, dest);
9303 source = gen_lowpart (V1TImode, source);
9307 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9309 if (mode == TImode || mode == V1TImode)
9310 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9314 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9315 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9319 /* Emit a little-endian load from vector memory location SOURCE to VSX
9320 register DEST in mode MODE. The load is done with two permuting
9321 insn's that represent an lxvd2x and xxpermdi. */
9323 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9325 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9327 if (mode == TImode || mode == V1TImode)
9330 dest = gen_lowpart (V2DImode, dest);
9331 source = adjust_address (source, V2DImode, 0);
9334 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9335 rs6000_emit_le_vsx_permute (tmp, source, mode);
9336 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9339 /* Emit a little-endian store to vector memory location DEST from VSX
9340 register SOURCE in mode MODE. The store is done with two permuting
9341 insn's that represent an xxpermdi and an stxvd2x. */
9343 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9345 /* This should never be called during or after LRA, because it does
9346 not re-permute the source register. It is intended only for use
9348 gcc_assert (!lra_in_progress && !reload_completed);
9350 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9352 if (mode == TImode || mode == V1TImode)
9355 dest = adjust_address (dest, V2DImode, 0);
9356 source = gen_lowpart (V2DImode, source);
9359 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9360 rs6000_emit_le_vsx_permute (tmp, source, mode);
9361 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9364 /* Emit a sequence representing a little-endian VSX load or store,
9365 moving data from SOURCE to DEST in mode MODE. This is done
9366 separately from rs6000_emit_move to ensure it is called only
9367 during expand. LE VSX loads and stores introduced later are
9368 handled with a split. The expand-time RTL generation allows
9369 us to optimize away redundant pairs of register-permutes. */
9371 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9373 gcc_assert (!BYTES_BIG_ENDIAN
9374 && VECTOR_MEM_VSX_P (mode)
9375 && !TARGET_P9_VECTOR
9376 && !gpr_or_gpr_p (dest, source)
9377 && (MEM_P (source) ^ MEM_P (dest)));
9381 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9382 rs6000_emit_le_vsx_load (dest, source, mode);
9386 if (!REG_P (source))
9387 source = force_reg (mode, source);
9388 rs6000_emit_le_vsx_store (dest, source, mode);
9392 /* Return whether a SFmode or SImode move can be done without converting one
9393 mode to another. This arrises when we have:
9395 (SUBREG:SF (REG:SI ...))
9396 (SUBREG:SI (REG:SF ...))
9398 and one of the values is in a floating point/vector register, where SFmode
9399 scalars are stored in DFmode format. */
9402 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9404 if (TARGET_ALLOW_SF_SUBREG)
9407 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9410 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9413 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9414 if (SUBREG_P (dest))
9416 rtx dest_subreg = SUBREG_REG (dest);
9417 rtx src_subreg = SUBREG_REG (src);
9418 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9425 /* Helper function to change moves with:
9427 (SUBREG:SF (REG:SI)) and
9428 (SUBREG:SI (REG:SF))
9430 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9431 values are stored as DFmode values in the VSX registers. We need to convert
9432 the bits before we can use a direct move or operate on the bits in the
9433 vector register as an integer type.
9435 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9438 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9440 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9441 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9442 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9444 rtx inner_source = SUBREG_REG (source);
9445 machine_mode inner_mode = GET_MODE (inner_source);
9447 if (mode == SImode && inner_mode == SFmode)
9449 emit_insn (gen_movsi_from_sf (dest, inner_source));
9453 if (mode == SFmode && inner_mode == SImode)
9455 emit_insn (gen_movsf_from_si (dest, inner_source));
9463 /* Emit a move from SOURCE to DEST in mode MODE. */
9465 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9469 operands[1] = source;
9471 if (TARGET_DEBUG_ADDR)
9474 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9475 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9476 GET_MODE_NAME (mode),
9479 can_create_pseudo_p ());
9481 fprintf (stderr, "source:\n");
9485 /* Check that we get CONST_WIDE_INT only when we should. */
9486 if (CONST_WIDE_INT_P (operands[1])
9487 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9490 #ifdef HAVE_AS_GNU_ATTRIBUTE
9491 /* If we use a long double type, set the flags in .gnu_attribute that say
9492 what the long double type is. This is to allow the linker's warning
9493 message for the wrong long double to be useful, even if the function does
9494 not do a call (for example, doing a 128-bit add on power9 if the long
9495 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9496 used if they aren't the default long dobule type. */
9497 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9499 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9500 rs6000_passes_float = rs6000_passes_long_double = true;
9502 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9503 rs6000_passes_float = rs6000_passes_long_double = true;
9507 /* See if we need to special case SImode/SFmode SUBREG moves. */
9508 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9509 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9512 /* Check if GCC is setting up a block move that will end up using FP
9513 registers as temporaries. We must make sure this is acceptable. */
9514 if (MEM_P (operands[0])
9515 && MEM_P (operands[1])
9517 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9518 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9519 && ! (rs6000_slow_unaligned_access (SImode,
9520 (MEM_ALIGN (operands[0]) > 32
9521 ? 32 : MEM_ALIGN (operands[0])))
9522 || rs6000_slow_unaligned_access (SImode,
9523 (MEM_ALIGN (operands[1]) > 32
9524 ? 32 : MEM_ALIGN (operands[1]))))
9525 && ! MEM_VOLATILE_P (operands [0])
9526 && ! MEM_VOLATILE_P (operands [1]))
9528 emit_move_insn (adjust_address (operands[0], SImode, 0),
9529 adjust_address (operands[1], SImode, 0));
9530 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9531 adjust_address (copy_rtx (operands[1]), SImode, 4));
9535 if (can_create_pseudo_p () && MEM_P (operands[0])
9536 && !gpc_reg_operand (operands[1], mode))
9537 operands[1] = force_reg (mode, operands[1]);
9539 /* Recognize the case where operand[1] is a reference to thread-local
9540 data and load its address to a register. */
9541 if (tls_referenced_p (operands[1]))
9543 enum tls_model model;
9544 rtx tmp = operands[1];
9547 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9549 addend = XEXP (XEXP (tmp, 0), 1);
9550 tmp = XEXP (XEXP (tmp, 0), 0);
9553 gcc_assert (SYMBOL_REF_P (tmp));
9554 model = SYMBOL_REF_TLS_MODEL (tmp);
9555 gcc_assert (model != 0);
9557 tmp = rs6000_legitimize_tls_address (tmp, model);
9560 tmp = gen_rtx_PLUS (mode, tmp, addend);
9561 tmp = force_operand (tmp, operands[0]);
9566 /* 128-bit constant floating-point values on Darwin should really be loaded
9567 as two parts. However, this premature splitting is a problem when DFmode
9568 values can go into Altivec registers. */
9569 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9570 && !reg_addr[DFmode].scalar_in_vmx_p)
9572 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9573 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9575 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9576 GET_MODE_SIZE (DFmode)),
9577 simplify_gen_subreg (DFmode, operands[1], mode,
9578 GET_MODE_SIZE (DFmode)),
9583 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9584 p1:SD) if p1 is not of floating point class and p0 is spilled as
9585 we can have no analogous movsd_store for this. */
9586 if (lra_in_progress && mode == DDmode
9587 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9588 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9589 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9590 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9593 int regno = REGNO (SUBREG_REG (operands[1]));
9595 if (!HARD_REGISTER_NUM_P (regno))
9597 cl = reg_preferred_class (regno);
9598 regno = reg_renumber[regno];
9600 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9602 if (regno >= 0 && ! FP_REGNO_P (regno))
9605 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9606 operands[1] = SUBREG_REG (operands[1]);
9611 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9612 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9613 && (REG_P (operands[1])
9614 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9616 int regno = reg_or_subregno (operands[1]);
9619 if (!HARD_REGISTER_NUM_P (regno))
9621 cl = reg_preferred_class (regno);
9622 gcc_assert (cl != NO_REGS);
9623 regno = reg_renumber[regno];
9625 regno = ira_class_hard_regs[cl][0];
9627 if (FP_REGNO_P (regno))
9629 if (GET_MODE (operands[0]) != DDmode)
9630 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9631 emit_insn (gen_movsd_store (operands[0], operands[1]));
9633 else if (INT_REGNO_P (regno))
9634 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9639 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9640 p:DD)) if p0 is not of floating point class and p1 is spilled as
9641 we can have no analogous movsd_load for this. */
9642 if (lra_in_progress && mode == DDmode
9643 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9644 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9645 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9646 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9649 int regno = REGNO (SUBREG_REG (operands[0]));
9651 if (!HARD_REGISTER_NUM_P (regno))
9653 cl = reg_preferred_class (regno);
9654 regno = reg_renumber[regno];
9656 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9658 if (regno >= 0 && ! FP_REGNO_P (regno))
9661 operands[0] = SUBREG_REG (operands[0]);
9662 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9667 && (REG_P (operands[0])
9668 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9669 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9670 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9672 int regno = reg_or_subregno (operands[0]);
9675 if (!HARD_REGISTER_NUM_P (regno))
9677 cl = reg_preferred_class (regno);
9678 gcc_assert (cl != NO_REGS);
9679 regno = reg_renumber[regno];
9681 regno = ira_class_hard_regs[cl][0];
9683 if (FP_REGNO_P (regno))
9685 if (GET_MODE (operands[1]) != DDmode)
9686 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9687 emit_insn (gen_movsd_load (operands[0], operands[1]));
9689 else if (INT_REGNO_P (regno))
9690 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9696 /* FIXME: In the long term, this switch statement should go away
9697 and be replaced by a sequence of tests based on things like
9703 if (CONSTANT_P (operands[1])
9704 && !CONST_INT_P (operands[1]))
9705 operands[1] = force_const_mem (mode, operands[1]);
9712 if (FLOAT128_2REG_P (mode))
9713 rs6000_eliminate_indexed_memrefs (operands);
9720 if (CONSTANT_P (operands[1])
9721 && ! easy_fp_constant (operands[1], mode))
9722 operands[1] = force_const_mem (mode, operands[1]);
9732 if (CONSTANT_P (operands[1])
9733 && !easy_vector_constant (operands[1], mode))
9734 operands[1] = force_const_mem (mode, operands[1]);
9739 /* Use default pattern for address of ELF small data */
9742 && DEFAULT_ABI == ABI_V4
9743 && (SYMBOL_REF_P (operands[1])
9744 || GET_CODE (operands[1]) == CONST)
9745 && small_data_operand (operands[1], mode))
9747 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9751 if (DEFAULT_ABI == ABI_V4
9752 && mode == Pmode && mode == SImode
9753 && flag_pic == 1 && got_operand (operands[1], mode))
9755 emit_insn (gen_movsi_got (operands[0], operands[1]));
9759 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9763 && CONSTANT_P (operands[1])
9764 && GET_CODE (operands[1]) != HIGH
9765 && !CONST_INT_P (operands[1]))
9767 rtx target = (!can_create_pseudo_p ()
9769 : gen_reg_rtx (mode));
9771 /* If this is a function address on -mcall-aixdesc,
9772 convert it to the address of the descriptor. */
9773 if (DEFAULT_ABI == ABI_AIX
9774 && SYMBOL_REF_P (operands[1])
9775 && XSTR (operands[1], 0)[0] == '.')
9777 const char *name = XSTR (operands[1], 0);
9779 while (*name == '.')
9781 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9782 CONSTANT_POOL_ADDRESS_P (new_ref)
9783 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9784 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9785 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9786 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9787 operands[1] = new_ref;
9790 if (DEFAULT_ABI == ABI_DARWIN)
9793 if (MACHO_DYNAMIC_NO_PIC_P)
9795 /* Take care of any required data indirection. */
9796 operands[1] = rs6000_machopic_legitimize_pic_address (
9797 operands[1], mode, operands[0]);
9798 if (operands[0] != operands[1])
9799 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9803 emit_insn (gen_macho_high (target, operands[1]));
9804 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9808 emit_insn (gen_elf_high (target, operands[1]));
9809 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9813 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9814 and we have put it in the TOC, we just need to make a TOC-relative
9817 && SYMBOL_REF_P (operands[1])
9818 && use_toc_relative_ref (operands[1], mode))
9819 operands[1] = create_TOC_reference (operands[1], operands[0]);
9820 else if (mode == Pmode
9821 && CONSTANT_P (operands[1])
9822 && GET_CODE (operands[1]) != HIGH
9823 && ((REG_P (operands[0])
9824 && FP_REGNO_P (REGNO (operands[0])))
9825 || !CONST_INT_P (operands[1])
9826 || (num_insns_constant (operands[1], mode)
9827 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9828 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9829 && (TARGET_CMODEL == CMODEL_SMALL
9830 || can_create_pseudo_p ()
9831 || (REG_P (operands[0])
9832 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9836 /* Darwin uses a special PIC legitimizer. */
9837 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9840 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9842 if (operands[0] != operands[1])
9843 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9848 /* If we are to limit the number of things we put in the TOC and
9849 this is a symbol plus a constant we can add in one insn,
9850 just put the symbol in the TOC and add the constant. */
9851 if (GET_CODE (operands[1]) == CONST
9852 && TARGET_NO_SUM_IN_TOC
9853 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9854 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9855 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9856 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9857 && ! side_effects_p (operands[0]))
9860 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9861 rtx other = XEXP (XEXP (operands[1], 0), 1);
9863 sym = force_reg (mode, sym);
9864 emit_insn (gen_add3_insn (operands[0], sym, other));
9868 operands[1] = force_const_mem (mode, operands[1]);
9871 && SYMBOL_REF_P (XEXP (operands[1], 0))
9872 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9874 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9876 operands[1] = gen_const_mem (mode, tocref);
9877 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9883 if (!VECTOR_MEM_VSX_P (TImode))
9884 rs6000_eliminate_indexed_memrefs (operands);
9888 rs6000_eliminate_indexed_memrefs (operands);
9892 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9895 /* Above, we may have called force_const_mem which may have returned
9896 an invalid address. If we can, fix this up; otherwise, reload will
9897 have to deal with it. */
9898 if (MEM_P (operands[1]))
9899 operands[1] = validize_mem (operands[1]);
9901 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9904 /* Nonzero if we can use a floating-point register to pass this arg. */
9905 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9906 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9907 && (CUM)->fregno <= FP_ARG_MAX_REG \
9908 && TARGET_HARD_FLOAT)
9910 /* Nonzero if we can use an AltiVec register to pass this arg. */
9911 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9912 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9913 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9914 && TARGET_ALTIVEC_ABI \
9917 /* Walk down the type tree of TYPE counting consecutive base elements.
9918 If *MODEP is VOIDmode, then set it to the first valid floating point
9919 or vector type. If a non-floating point or vector type is found, or
9920 if a floating point or vector type that doesn't match a non-VOIDmode
9921 *MODEP is found, then return -1, otherwise return the count in the
9925 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9930 switch (TREE_CODE (type))
9933 mode = TYPE_MODE (type);
9934 if (!SCALAR_FLOAT_MODE_P (mode))
9937 if (*modep == VOIDmode)
9946 mode = TYPE_MODE (TREE_TYPE (type));
9947 if (!SCALAR_FLOAT_MODE_P (mode))
9950 if (*modep == VOIDmode)
9959 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9962 /* Use V4SImode as representative of all 128-bit vector types. */
9963 size = int_size_in_bytes (type);
9973 if (*modep == VOIDmode)
9976 /* Vector modes are considered to be opaque: two vectors are
9977 equivalent for the purposes of being homogeneous aggregates
9978 if they are the same size. */
9987 tree index = TYPE_DOMAIN (type);
9989 /* Can't handle incomplete types nor sizes that are not
9991 if (!COMPLETE_TYPE_P (type)
9992 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9995 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9998 || !TYPE_MAX_VALUE (index)
9999 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10000 || !TYPE_MIN_VALUE (index)
10001 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10005 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10006 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10008 /* There must be no padding. */
10009 if (wi::to_wide (TYPE_SIZE (type))
10010 != count * GET_MODE_BITSIZE (*modep))
10022 /* Can't handle incomplete types nor sizes that are not
10024 if (!COMPLETE_TYPE_P (type)
10025 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10028 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10030 if (TREE_CODE (field) != FIELD_DECL)
10033 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10036 count += sub_count;
10039 /* There must be no padding. */
10040 if (wi::to_wide (TYPE_SIZE (type))
10041 != count * GET_MODE_BITSIZE (*modep))
10048 case QUAL_UNION_TYPE:
10050 /* These aren't very interesting except in a degenerate case. */
10055 /* Can't handle incomplete types nor sizes that are not
10057 if (!COMPLETE_TYPE_P (type)
10058 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10061 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10063 if (TREE_CODE (field) != FIELD_DECL)
10066 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10069 count = count > sub_count ? count : sub_count;
10072 /* There must be no padding. */
10073 if (wi::to_wide (TYPE_SIZE (type))
10074 != count * GET_MODE_BITSIZE (*modep))
10087 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10088 float or vector aggregate that shall be passed in FP/vector registers
10089 according to the ELFv2 ABI, return the homogeneous element mode in
10090 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10092 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10095 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10096 machine_mode *elt_mode,
10099 /* Note that we do not accept complex types at the top level as
10100 homogeneous aggregates; these types are handled via the
10101 targetm.calls.split_complex_arg mechanism. Complex types
10102 can be elements of homogeneous aggregates, however. */
10103 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10104 && AGGREGATE_TYPE_P (type))
10106 machine_mode field_mode = VOIDmode;
10107 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10109 if (field_count > 0)
10111 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10112 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10114 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10115 up to AGGR_ARG_NUM_REG registers. */
10116 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10119 *elt_mode = field_mode;
10121 *n_elts = field_count;
10134 /* Return a nonzero value to say to return the function value in
10135 memory, just as large structures are always returned. TYPE will be
10136 the data type of the value, and FNTYPE will be the type of the
10137 function doing the returning, or @code{NULL} for libcalls.
10139 The AIX ABI for the RS/6000 specifies that all structures are
10140 returned in memory. The Darwin ABI does the same.
10142 For the Darwin 64 Bit ABI, a function result can be returned in
10143 registers or in memory, depending on the size of the return data
10144 type. If it is returned in registers, the value occupies the same
10145 registers as it would if it were the first and only function
10146 argument. Otherwise, the function places its result in memory at
10147 the location pointed to by GPR3.
10149 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10150 but a draft put them in memory, and GCC used to implement the draft
10151 instead of the final standard. Therefore, aix_struct_return
10152 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10153 compatibility can change DRAFT_V4_STRUCT_RET to override the
10154 default, and -m switches get the final word. See
10155 rs6000_option_override_internal for more details.
10157 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10158 long double support is enabled. These values are returned in memory.
10160 int_size_in_bytes returns -1 for variable size objects, which go in
10161 memory always. The cast to unsigned makes -1 > 8. */
10164 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10166 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10168 && rs6000_darwin64_abi
10169 && TREE_CODE (type) == RECORD_TYPE
10170 && int_size_in_bytes (type) > 0)
10172 CUMULATIVE_ARGS valcum;
10176 valcum.fregno = FP_ARG_MIN_REG;
10177 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10178 /* Do a trial code generation as if this were going to be passed
10179 as an argument; if any part goes in memory, we return NULL. */
10180 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10183 /* Otherwise fall through to more conventional ABI rules. */
10186 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10187 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10191 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10192 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10193 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10196 if (AGGREGATE_TYPE_P (type)
10197 && (aix_struct_return
10198 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10201 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10202 modes only exist for GCC vector types if -maltivec. */
10203 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10204 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10207 /* Return synthetic vectors in memory. */
10208 if (TREE_CODE (type) == VECTOR_TYPE
10209 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10211 static bool warned_for_return_big_vectors = false;
10212 if (!warned_for_return_big_vectors)
10214 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10215 "non-standard ABI extension with no compatibility "
10217 warned_for_return_big_vectors = true;
10222 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10223 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10229 /* Specify whether values returned in registers should be at the most
10230 significant end of a register. We want aggregates returned by
10231 value to match the way aggregates are passed to functions. */
10234 rs6000_return_in_msb (const_tree valtype)
10236 return (DEFAULT_ABI == ABI_ELFv2
10237 && BYTES_BIG_ENDIAN
10238 && AGGREGATE_TYPE_P (valtype)
10239 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10243 #ifdef HAVE_AS_GNU_ATTRIBUTE
10244 /* Return TRUE if a call to function FNDECL may be one that
10245 potentially affects the function calling ABI of the object file. */
10248 call_ABI_of_interest (tree fndecl)
10250 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10252 struct cgraph_node *c_node;
10254 /* Libcalls are always interesting. */
10255 if (fndecl == NULL_TREE)
10258 /* Any call to an external function is interesting. */
10259 if (DECL_EXTERNAL (fndecl))
10262 /* Interesting functions that we are emitting in this object file. */
10263 c_node = cgraph_node::get (fndecl);
10264 c_node = c_node->ultimate_alias_target ();
10265 return !c_node->only_called_directly_p ();
10271 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10272 for a call to a function whose data type is FNTYPE.
10273 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10275 For incoming args we set the number of arguments in the prototype large
10276 so we never return a PARALLEL. */
10279 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10280 rtx libname ATTRIBUTE_UNUSED, int incoming,
10281 int libcall, int n_named_args,
10283 machine_mode return_mode ATTRIBUTE_UNUSED)
10285 static CUMULATIVE_ARGS zero_cumulative;
10287 *cum = zero_cumulative;
10289 cum->fregno = FP_ARG_MIN_REG;
10290 cum->vregno = ALTIVEC_ARG_MIN_REG;
10291 cum->prototype = (fntype && prototype_p (fntype));
10292 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10293 ? CALL_LIBCALL : CALL_NORMAL);
10294 cum->sysv_gregno = GP_ARG_MIN_REG;
10295 cum->stdarg = stdarg_p (fntype);
10296 cum->libcall = libcall;
10298 cum->nargs_prototype = 0;
10299 if (incoming || cum->prototype)
10300 cum->nargs_prototype = n_named_args;
10302 /* Check for a longcall attribute. */
10303 if ((!fntype && rs6000_default_long_calls)
10305 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10306 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10307 cum->call_cookie |= CALL_LONG;
10308 else if (DEFAULT_ABI != ABI_DARWIN)
10310 bool is_local = (fndecl
10311 && !DECL_EXTERNAL (fndecl)
10312 && !DECL_WEAK (fndecl)
10313 && (*targetm.binds_local_p) (fndecl));
10319 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
10320 cum->call_cookie |= CALL_LONG;
10325 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
10326 cum->call_cookie |= CALL_LONG;
10330 if (TARGET_DEBUG_ARG)
10332 fprintf (stderr, "\ninit_cumulative_args:");
10335 tree ret_type = TREE_TYPE (fntype);
10336 fprintf (stderr, " ret code = %s,",
10337 get_tree_code_name (TREE_CODE (ret_type)));
10340 if (cum->call_cookie & CALL_LONG)
10341 fprintf (stderr, " longcall,");
10343 fprintf (stderr, " proto = %d, nargs = %d\n",
10344 cum->prototype, cum->nargs_prototype);
10347 #ifdef HAVE_AS_GNU_ATTRIBUTE
10348 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10350 cum->escapes = call_ABI_of_interest (fndecl);
10357 return_type = TREE_TYPE (fntype);
10358 return_mode = TYPE_MODE (return_type);
10361 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10363 if (return_type != NULL)
10365 if (TREE_CODE (return_type) == RECORD_TYPE
10366 && TYPE_TRANSPARENT_AGGR (return_type))
10368 return_type = TREE_TYPE (first_field (return_type));
10369 return_mode = TYPE_MODE (return_type);
10371 if (AGGREGATE_TYPE_P (return_type)
10372 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10374 rs6000_returns_struct = true;
10376 if (SCALAR_FLOAT_MODE_P (return_mode))
10378 rs6000_passes_float = true;
10379 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10380 && (FLOAT128_IBM_P (return_mode)
10381 || FLOAT128_IEEE_P (return_mode)
10382 || (return_type != NULL
10383 && (TYPE_MAIN_VARIANT (return_type)
10384 == long_double_type_node))))
10385 rs6000_passes_long_double = true;
10387 /* Note if we passed or return a IEEE 128-bit type. We changed
10388 the mangling for these types, and we may need to make an alias
10389 with the old mangling. */
10390 if (FLOAT128_IEEE_P (return_mode))
10391 rs6000_passes_ieee128 = true;
10393 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10394 rs6000_passes_vector = true;
10401 && TARGET_ALTIVEC_ABI
10402 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10404 error ("cannot return value in vector register because"
10405 " altivec instructions are disabled, use %qs"
10406 " to enable them", "-maltivec");
10410 /* The mode the ABI uses for a word. This is not the same as word_mode
10411 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10413 static scalar_int_mode
10414 rs6000_abi_word_mode (void)
10416 return TARGET_32BIT ? SImode : DImode;
10419 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10421 rs6000_offload_options (void)
10424 return xstrdup ("-foffload-abi=lp64");
10426 return xstrdup ("-foffload-abi=ilp32");
10429 /* On rs6000, function arguments are promoted, as are function return
10432 static machine_mode
10433 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10435 int *punsignedp ATTRIBUTE_UNUSED,
10438 PROMOTE_MODE (mode, *punsignedp, type);
10443 /* Return true if TYPE must be passed on the stack and not in registers. */
10446 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10448 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10449 return must_pass_in_stack_var_size (mode, type);
10451 return must_pass_in_stack_var_size_or_pad (mode, type);
10455 is_complex_IBM_long_double (machine_mode mode)
10457 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10460 /* Whether ABI_V4 passes MODE args to a function in floating point
10464 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10466 if (!TARGET_HARD_FLOAT)
10468 if (mode == DFmode)
10470 if (mode == SFmode && named)
10472 /* ABI_V4 passes complex IBM long double in 8 gprs.
10473 Stupid, but we can't change the ABI now. */
10474 if (is_complex_IBM_long_double (mode))
10476 if (FLOAT128_2REG_P (mode))
10478 if (DECIMAL_FLOAT_MODE_P (mode))
10483 /* Implement TARGET_FUNCTION_ARG_PADDING.
10485 For the AIX ABI structs are always stored left shifted in their
10488 static pad_direction
10489 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10491 #ifndef AGGREGATE_PADDING_FIXED
10492 #define AGGREGATE_PADDING_FIXED 0
10494 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10495 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10498 if (!AGGREGATE_PADDING_FIXED)
10500 /* GCC used to pass structures of the same size as integer types as
10501 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10502 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10503 passed padded downward, except that -mstrict-align further
10504 muddied the water in that multi-component structures of 2 and 4
10505 bytes in size were passed padded upward.
10507 The following arranges for best compatibility with previous
10508 versions of gcc, but removes the -mstrict-align dependency. */
10509 if (BYTES_BIG_ENDIAN)
10511 HOST_WIDE_INT size = 0;
10513 if (mode == BLKmode)
10515 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10516 size = int_size_in_bytes (type);
10519 size = GET_MODE_SIZE (mode);
10521 if (size == 1 || size == 2 || size == 4)
10522 return PAD_DOWNWARD;
10527 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10529 if (type != 0 && AGGREGATE_TYPE_P (type))
10533 /* Fall back to the default. */
10534 return default_function_arg_padding (mode, type);
10537 /* If defined, a C expression that gives the alignment boundary, in bits,
10538 of an argument with the specified mode and type. If it is not defined,
10539 PARM_BOUNDARY is used for all arguments.
10541 V.4 wants long longs and doubles to be double word aligned. Just
10542 testing the mode size is a boneheaded way to do this as it means
10543 that other types such as complex int are also double word aligned.
10544 However, we're stuck with this because changing the ABI might break
10545 existing library interfaces.
10547 Quadword align Altivec/VSX vectors.
10548 Quadword align large synthetic vector types. */
10550 static unsigned int
10551 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10553 machine_mode elt_mode;
10556 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10558 if (DEFAULT_ABI == ABI_V4
10559 && (GET_MODE_SIZE (mode) == 8
10560 || (TARGET_HARD_FLOAT
10561 && !is_complex_IBM_long_double (mode)
10562 && FLOAT128_2REG_P (mode))))
10564 else if (FLOAT128_VECTOR_P (mode))
10566 else if (type && TREE_CODE (type) == VECTOR_TYPE
10567 && int_size_in_bytes (type) >= 8
10568 && int_size_in_bytes (type) < 16)
10570 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10571 || (type && TREE_CODE (type) == VECTOR_TYPE
10572 && int_size_in_bytes (type) >= 16))
10575 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10576 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10577 -mcompat-align-parm is used. */
10578 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10579 || DEFAULT_ABI == ABI_ELFv2)
10580 && type && TYPE_ALIGN (type) > 64)
10582 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10583 or homogeneous float/vector aggregates here. We already handled
10584 vector aggregates above, but still need to check for float here. */
10585 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10586 && !SCALAR_FLOAT_MODE_P (elt_mode));
10588 /* We used to check for BLKmode instead of the above aggregate type
10589 check. Warn when this results in any difference to the ABI. */
10590 if (aggregate_p != (mode == BLKmode))
10592 static bool warned;
10593 if (!warned && warn_psabi)
10596 inform (input_location,
10597 "the ABI of passing aggregates with %d-byte alignment"
10598 " has changed in GCC 5",
10599 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10607 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10608 implement the "aggregate type" check as a BLKmode check here; this
10609 means certain aggregate types are in fact not aligned. */
10610 if (TARGET_MACHO && rs6000_darwin64_abi
10612 && type && TYPE_ALIGN (type) > 64)
10615 return PARM_BOUNDARY;
10618 /* The offset in words to the start of the parameter save area. */
10620 static unsigned int
10621 rs6000_parm_offset (void)
10623 return (DEFAULT_ABI == ABI_V4 ? 2
10624 : DEFAULT_ABI == ABI_ELFv2 ? 4
10628 /* For a function parm of MODE and TYPE, return the starting word in
10629 the parameter area. NWORDS of the parameter area are already used. */
10631 static unsigned int
10632 rs6000_parm_start (machine_mode mode, const_tree type,
10633 unsigned int nwords)
10635 unsigned int align;
10637 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10638 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10641 /* Compute the size (in words) of a function argument. */
10643 static unsigned long
10644 rs6000_arg_size (machine_mode mode, const_tree type)
10646 unsigned long size;
10648 if (mode != BLKmode)
10649 size = GET_MODE_SIZE (mode);
10651 size = int_size_in_bytes (type);
10654 return (size + 3) >> 2;
10656 return (size + 7) >> 3;
10659 /* Use this to flush pending int fields. */
10662 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10663 HOST_WIDE_INT bitpos, int final)
10665 unsigned int startbit, endbit;
10666 int intregs, intoffset;
10668 /* Handle the situations where a float is taking up the first half
10669 of the GPR, and the other half is empty (typically due to
10670 alignment restrictions). We can detect this by a 8-byte-aligned
10671 int field, or by seeing that this is the final flush for this
10672 argument. Count the word and continue on. */
10673 if (cum->floats_in_gpr == 1
10674 && (cum->intoffset % 64 == 0
10675 || (cum->intoffset == -1 && final)))
10678 cum->floats_in_gpr = 0;
10681 if (cum->intoffset == -1)
10684 intoffset = cum->intoffset;
10685 cum->intoffset = -1;
10686 cum->floats_in_gpr = 0;
10688 if (intoffset % BITS_PER_WORD != 0)
10690 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
10691 if (!int_mode_for_size (bits, 0).exists ())
10693 /* We couldn't find an appropriate mode, which happens,
10694 e.g., in packed structs when there are 3 bytes to load.
10695 Back intoffset back to the beginning of the word in this
10697 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10701 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10702 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10703 intregs = (endbit - startbit) / BITS_PER_WORD;
10704 cum->words += intregs;
10705 /* words should be unsigned. */
10706 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10708 int pad = (endbit/BITS_PER_WORD) - cum->words;
10713 /* The darwin64 ABI calls for us to recurse down through structs,
10714 looking for elements passed in registers. Unfortunately, we have
10715 to track int register count here also because of misalignments
10716 in powerpc alignment mode. */
10719 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10721 HOST_WIDE_INT startbitpos)
10725 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10726 if (TREE_CODE (f) == FIELD_DECL)
10728 HOST_WIDE_INT bitpos = startbitpos;
10729 tree ftype = TREE_TYPE (f);
10731 if (ftype == error_mark_node)
10733 mode = TYPE_MODE (ftype);
10735 if (DECL_SIZE (f) != 0
10736 && tree_fits_uhwi_p (bit_position (f)))
10737 bitpos += int_bit_position (f);
10739 /* ??? FIXME: else assume zero offset. */
10741 if (TREE_CODE (ftype) == RECORD_TYPE)
10742 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10743 else if (USE_FP_FOR_ARG_P (cum, mode))
10745 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10746 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10747 cum->fregno += n_fpregs;
10748 /* Single-precision floats present a special problem for
10749 us, because they are smaller than an 8-byte GPR, and so
10750 the structure-packing rules combined with the standard
10751 varargs behavior mean that we want to pack float/float
10752 and float/int combinations into a single register's
10753 space. This is complicated by the arg advance flushing,
10754 which works on arbitrarily large groups of int-type
10756 if (mode == SFmode)
10758 if (cum->floats_in_gpr == 1)
10760 /* Two floats in a word; count the word and reset
10761 the float count. */
10763 cum->floats_in_gpr = 0;
10765 else if (bitpos % 64 == 0)
10767 /* A float at the beginning of an 8-byte word;
10768 count it and put off adjusting cum->words until
10769 we see if a arg advance flush is going to do it
10771 cum->floats_in_gpr++;
10775 /* The float is at the end of a word, preceded
10776 by integer fields, so the arg advance flush
10777 just above has already set cum->words and
10778 everything is taken care of. */
10782 cum->words += n_fpregs;
10784 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10786 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10790 else if (cum->intoffset == -1)
10791 cum->intoffset = bitpos;
10795 /* Check for an item that needs to be considered specially under the darwin 64
10796 bit ABI. These are record types where the mode is BLK or the structure is
10797 8 bytes in size. */
10799 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10801 return rs6000_darwin64_abi
10802 && ((mode == BLKmode
10803 && TREE_CODE (type) == RECORD_TYPE
10804 && int_size_in_bytes (type) > 0)
10805 || (type && TREE_CODE (type) == RECORD_TYPE
10806 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10809 /* Update the data in CUM to advance over an argument
10810 of mode MODE and data type TYPE.
10811 (TYPE is null for libcalls where that information may not be available.)
10813 Note that for args passed by reference, function_arg will be called
10814 with MODE and TYPE set to that of the pointer to the arg, not the arg
10818 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10819 const_tree type, bool named, int depth)
10821 machine_mode elt_mode;
10824 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10826 /* Only tick off an argument if we're not recursing. */
10828 cum->nargs_prototype--;
10830 #ifdef HAVE_AS_GNU_ATTRIBUTE
10831 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
10834 if (SCALAR_FLOAT_MODE_P (mode))
10836 rs6000_passes_float = true;
10837 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10838 && (FLOAT128_IBM_P (mode)
10839 || FLOAT128_IEEE_P (mode)
10841 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
10842 rs6000_passes_long_double = true;
10844 /* Note if we passed or return a IEEE 128-bit type. We changed the
10845 mangling for these types, and we may need to make an alias with
10846 the old mangling. */
10847 if (FLOAT128_IEEE_P (mode))
10848 rs6000_passes_ieee128 = true;
10850 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10851 rs6000_passes_vector = true;
10855 if (TARGET_ALTIVEC_ABI
10856 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10857 || (type && TREE_CODE (type) == VECTOR_TYPE
10858 && int_size_in_bytes (type) == 16)))
10860 bool stack = false;
10862 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10864 cum->vregno += n_elts;
10866 if (!TARGET_ALTIVEC)
10867 error ("cannot pass argument in vector register because"
10868 " altivec instructions are disabled, use %qs"
10869 " to enable them", "-maltivec");
10871 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10872 even if it is going to be passed in a vector register.
10873 Darwin does the same for variable-argument functions. */
10874 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10876 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10886 /* Vector parameters must be 16-byte aligned. In 32-bit
10887 mode this means we need to take into account the offset
10888 to the parameter save area. In 64-bit mode, they just
10889 have to start on an even word, since the parameter save
10890 area is 16-byte aligned. */
10892 align = -(rs6000_parm_offset () + cum->words) & 3;
10894 align = cum->words & 1;
10895 cum->words += align + rs6000_arg_size (mode, type);
10897 if (TARGET_DEBUG_ARG)
10899 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10900 cum->words, align);
10901 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10902 cum->nargs_prototype, cum->prototype,
10903 GET_MODE_NAME (mode));
10907 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10909 int size = int_size_in_bytes (type);
10910 /* Variable sized types have size == -1 and are
10911 treated as if consisting entirely of ints.
10912 Pad to 16 byte boundary if needed. */
10913 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10914 && (cum->words % 2) != 0)
10916 /* For varargs, we can just go up by the size of the struct. */
10918 cum->words += (size + 7) / 8;
10921 /* It is tempting to say int register count just goes up by
10922 sizeof(type)/8, but this is wrong in a case such as
10923 { int; double; int; } [powerpc alignment]. We have to
10924 grovel through the fields for these too. */
10925 cum->intoffset = 0;
10926 cum->floats_in_gpr = 0;
10927 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10928 rs6000_darwin64_record_arg_advance_flush (cum,
10929 size * BITS_PER_UNIT, 1);
10931 if (TARGET_DEBUG_ARG)
10933 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10934 cum->words, TYPE_ALIGN (type), size);
10936 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10937 cum->nargs_prototype, cum->prototype,
10938 GET_MODE_NAME (mode));
10941 else if (DEFAULT_ABI == ABI_V4)
10943 if (abi_v4_pass_in_fpr (mode, named))
10945 /* _Decimal128 must use an even/odd register pair. This assumes
10946 that the register number is odd when fregno is odd. */
10947 if (mode == TDmode && (cum->fregno % 2) == 1)
10950 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10951 <= FP_ARG_V4_MAX_REG)
10952 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10955 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10956 if (mode == DFmode || FLOAT128_IBM_P (mode)
10957 || mode == DDmode || mode == TDmode)
10958 cum->words += cum->words & 1;
10959 cum->words += rs6000_arg_size (mode, type);
10964 int n_words = rs6000_arg_size (mode, type);
10965 int gregno = cum->sysv_gregno;
10967 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
10968 As does any other 2 word item such as complex int due to a
10969 historical mistake. */
10971 gregno += (1 - gregno) & 1;
10973 /* Multi-reg args are not split between registers and stack. */
10974 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10976 /* Long long is aligned on the stack. So are other 2 word
10977 items such as complex int due to a historical mistake. */
10979 cum->words += cum->words & 1;
10980 cum->words += n_words;
10983 /* Note: continuing to accumulate gregno past when we've started
10984 spilling to the stack indicates the fact that we've started
10985 spilling to the stack to expand_builtin_saveregs. */
10986 cum->sysv_gregno = gregno + n_words;
10989 if (TARGET_DEBUG_ARG)
10991 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10992 cum->words, cum->fregno);
10993 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10994 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10995 fprintf (stderr, "mode = %4s, named = %d\n",
10996 GET_MODE_NAME (mode), named);
11001 int n_words = rs6000_arg_size (mode, type);
11002 int start_words = cum->words;
11003 int align_words = rs6000_parm_start (mode, type, start_words);
11005 cum->words = align_words + n_words;
11007 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11009 /* _Decimal128 must be passed in an even/odd float register pair.
11010 This assumes that the register number is odd when fregno is
11012 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11014 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11017 if (TARGET_DEBUG_ARG)
11019 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11020 cum->words, cum->fregno);
11021 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11022 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11023 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11024 named, align_words - start_words, depth);
11030 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11031 const_tree type, bool named)
11033 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11037 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11038 structure between cum->intoffset and bitpos to integer registers. */
11041 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11042 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11045 unsigned int regno;
11046 unsigned int startbit, endbit;
11047 int this_regno, intregs, intoffset;
11050 if (cum->intoffset == -1)
11053 intoffset = cum->intoffset;
11054 cum->intoffset = -1;
11056 /* If this is the trailing part of a word, try to only load that
11057 much into the register. Otherwise load the whole register. Note
11058 that in the latter case we may pick up unwanted bits. It's not a
11059 problem at the moment but may wish to revisit. */
11061 if (intoffset % BITS_PER_WORD != 0)
11063 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11064 if (!int_mode_for_size (bits, 0).exists (&mode))
11066 /* We couldn't find an appropriate mode, which happens,
11067 e.g., in packed structs when there are 3 bytes to load.
11068 Back intoffset back to the beginning of the word in this
11070 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11077 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11078 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11079 intregs = (endbit - startbit) / BITS_PER_WORD;
11080 this_regno = cum->words + intoffset / BITS_PER_WORD;
11082 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11083 cum->use_stack = 1;
11085 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11089 intoffset /= BITS_PER_UNIT;
11092 regno = GP_ARG_MIN_REG + this_regno;
11093 reg = gen_rtx_REG (mode, regno);
11095 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11098 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11102 while (intregs > 0);
11105 /* Recursive workhorse for the following. */
11108 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11109 HOST_WIDE_INT startbitpos, rtx rvec[],
11114 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11115 if (TREE_CODE (f) == FIELD_DECL)
11117 HOST_WIDE_INT bitpos = startbitpos;
11118 tree ftype = TREE_TYPE (f);
11120 if (ftype == error_mark_node)
11122 mode = TYPE_MODE (ftype);
11124 if (DECL_SIZE (f) != 0
11125 && tree_fits_uhwi_p (bit_position (f)))
11126 bitpos += int_bit_position (f);
11128 /* ??? FIXME: else assume zero offset. */
11130 if (TREE_CODE (ftype) == RECORD_TYPE)
11131 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11132 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11134 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11138 case E_SCmode: mode = SFmode; break;
11139 case E_DCmode: mode = DFmode; break;
11140 case E_TCmode: mode = TFmode; break;
11144 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11145 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11147 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11148 && (mode == TFmode || mode == TDmode));
11149 /* Long double or _Decimal128 split over regs and memory. */
11150 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11154 = gen_rtx_EXPR_LIST (VOIDmode,
11155 gen_rtx_REG (mode, cum->fregno++),
11156 GEN_INT (bitpos / BITS_PER_UNIT));
11157 if (FLOAT128_2REG_P (mode))
11160 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11162 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11164 = gen_rtx_EXPR_LIST (VOIDmode,
11165 gen_rtx_REG (mode, cum->vregno++),
11166 GEN_INT (bitpos / BITS_PER_UNIT));
11168 else if (cum->intoffset == -1)
11169 cum->intoffset = bitpos;
11173 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11174 the register(s) to be used for each field and subfield of a struct
11175 being passed by value, along with the offset of where the
11176 register's value may be found in the block. FP fields go in FP
11177 register, vector fields go in vector registers, and everything
11178 else goes in int registers, packed as in memory.
11180 This code is also used for function return values. RETVAL indicates
11181 whether this is the case.
11183 Much of this is taken from the SPARC V9 port, which has a similar
11184 calling convention. */
11187 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11188 bool named, bool retval)
11190 rtx rvec[FIRST_PSEUDO_REGISTER];
11191 int k = 1, kbase = 1;
11192 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11193 /* This is a copy; modifications are not visible to our caller. */
11194 CUMULATIVE_ARGS copy_cum = *orig_cum;
11195 CUMULATIVE_ARGS *cum = ©_cum;
11197 /* Pad to 16 byte boundary if needed. */
11198 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11199 && (cum->words % 2) != 0)
11202 cum->intoffset = 0;
11203 cum->use_stack = 0;
11204 cum->named = named;
11206 /* Put entries into rvec[] for individual FP and vector fields, and
11207 for the chunks of memory that go in int regs. Note we start at
11208 element 1; 0 is reserved for an indication of using memory, and
11209 may or may not be filled in below. */
11210 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11211 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11213 /* If any part of the struct went on the stack put all of it there.
11214 This hack is because the generic code for
11215 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11216 parts of the struct are not at the beginning. */
11217 if (cum->use_stack)
11220 return NULL_RTX; /* doesn't go in registers at all */
11222 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11224 if (k > 1 || cum->use_stack)
11225 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11230 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11233 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11238 rtx rvec[GP_ARG_NUM_REG + 1];
11240 if (align_words >= GP_ARG_NUM_REG)
11243 n_units = rs6000_arg_size (mode, type);
11245 /* Optimize the simple case where the arg fits in one gpr, except in
11246 the case of BLKmode due to assign_parms assuming that registers are
11247 BITS_PER_WORD wide. */
11249 || (n_units == 1 && mode != BLKmode))
11250 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11253 if (align_words + n_units > GP_ARG_NUM_REG)
11254 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11255 using a magic NULL_RTX component.
11256 This is not strictly correct. Only some of the arg belongs in
11257 memory, not all of it. However, the normal scheme using
11258 function_arg_partial_nregs can result in unusual subregs, eg.
11259 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11260 store the whole arg to memory is often more efficient than code
11261 to store pieces, and we know that space is available in the right
11262 place for the whole arg. */
11263 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11268 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11269 rtx off = GEN_INT (i++ * 4);
11270 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11272 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11274 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11277 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11278 but must also be copied into the parameter save area starting at
11279 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11280 to the GPRs and/or memory. Return the number of elements used. */
11283 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11284 int align_words, rtx *rvec)
11288 if (align_words < GP_ARG_NUM_REG)
11290 int n_words = rs6000_arg_size (mode, type);
11292 if (align_words + n_words > GP_ARG_NUM_REG
11294 || (TARGET_32BIT && TARGET_POWERPC64))
11296 /* If this is partially on the stack, then we only
11297 include the portion actually in registers here. */
11298 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11301 if (align_words + n_words > GP_ARG_NUM_REG)
11303 /* Not all of the arg fits in gprs. Say that it goes in memory
11304 too, using a magic NULL_RTX component. Also see comment in
11305 rs6000_mixed_function_arg for why the normal
11306 function_arg_partial_nregs scheme doesn't work in this case. */
11307 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11312 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11313 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11314 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11316 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11320 /* The whole arg fits in gprs. */
11321 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11322 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11327 /* It's entirely in memory. */
11328 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11334 /* RVEC is a vector of K components of an argument of mode MODE.
11335 Construct the final function_arg return value from it. */
11338 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11340 gcc_assert (k >= 1);
11342 /* Avoid returning a PARALLEL in the trivial cases. */
11345 if (XEXP (rvec[0], 0) == NULL_RTX)
11348 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11349 return XEXP (rvec[0], 0);
11352 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11355 /* Determine where to put an argument to a function.
11356 Value is zero to push the argument on the stack,
11357 or a hard register in which to store the argument.
11359 MODE is the argument's machine mode.
11360 TYPE is the data type of the argument (as a tree).
11361 This is null for libcalls where that information may
11363 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11364 the preceding args and about the function being called. It is
11365 not modified in this routine.
11366 NAMED is nonzero if this argument is a named parameter
11367 (otherwise it is an extra parameter matching an ellipsis).
11369 On RS/6000 the first eight words of non-FP are normally in registers
11370 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11371 Under V.4, the first 8 FP args are in registers.
11373 If this is floating-point and no prototype is specified, we use
11374 both an FP and integer register (or possibly FP reg and stack). Library
11375 functions (when CALL_LIBCALL is set) always have the proper types for args,
11376 so we can pass the FP value just in one register. emit_library_function
11377 doesn't support PARALLEL anyway.
11379 Note that for args passed by reference, function_arg will be called
11380 with MODE and TYPE set to that of the pointer to the arg, not the arg
11384 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11385 const_tree type, bool named)
11387 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11388 enum rs6000_abi abi = DEFAULT_ABI;
11389 machine_mode elt_mode;
11392 /* Return a marker to indicate whether CR1 needs to set or clear the
11393 bit that V.4 uses to say fp args were passed in registers.
11394 Assume that we don't need the marker for software floating point,
11395 or compiler generated library calls. */
11396 if (mode == VOIDmode)
11399 && (cum->call_cookie & CALL_LIBCALL) == 0
11401 || (cum->nargs_prototype < 0
11402 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11403 && TARGET_HARD_FLOAT)
11404 return GEN_INT (cum->call_cookie
11405 | ((cum->fregno == FP_ARG_MIN_REG)
11406 ? CALL_V4_SET_FP_ARGS
11407 : CALL_V4_CLEAR_FP_ARGS));
11409 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11412 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11414 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11416 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11417 if (rslt != NULL_RTX)
11419 /* Else fall through to usual handling. */
11422 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11424 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11428 /* Do we also need to pass this argument in the parameter save area?
11429 Library support functions for IEEE 128-bit are assumed to not need the
11430 value passed both in GPRs and in vector registers. */
11431 if (TARGET_64BIT && !cum->prototype
11432 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11434 int align_words = ROUND_UP (cum->words, 2);
11435 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11438 /* Describe where this argument goes in the vector registers. */
11439 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11441 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11442 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11443 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11446 return rs6000_finish_function_arg (mode, rvec, k);
11448 else if (TARGET_ALTIVEC_ABI
11449 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11450 || (type && TREE_CODE (type) == VECTOR_TYPE
11451 && int_size_in_bytes (type) == 16)))
11453 if (named || abi == ABI_V4)
11457 /* Vector parameters to varargs functions under AIX or Darwin
11458 get passed in memory and possibly also in GPRs. */
11459 int align, align_words, n_words;
11460 machine_mode part_mode;
11462 /* Vector parameters must be 16-byte aligned. In 32-bit
11463 mode this means we need to take into account the offset
11464 to the parameter save area. In 64-bit mode, they just
11465 have to start on an even word, since the parameter save
11466 area is 16-byte aligned. */
11468 align = -(rs6000_parm_offset () + cum->words) & 3;
11470 align = cum->words & 1;
11471 align_words = cum->words + align;
11473 /* Out of registers? Memory, then. */
11474 if (align_words >= GP_ARG_NUM_REG)
11477 if (TARGET_32BIT && TARGET_POWERPC64)
11478 return rs6000_mixed_function_arg (mode, type, align_words);
11480 /* The vector value goes in GPRs. Only the part of the
11481 value in GPRs is reported here. */
11483 n_words = rs6000_arg_size (mode, type);
11484 if (align_words + n_words > GP_ARG_NUM_REG)
11485 /* Fortunately, there are only two possibilities, the value
11486 is either wholly in GPRs or half in GPRs and half not. */
11487 part_mode = DImode;
11489 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11493 else if (abi == ABI_V4)
11495 if (abi_v4_pass_in_fpr (mode, named))
11497 /* _Decimal128 must use an even/odd register pair. This assumes
11498 that the register number is odd when fregno is odd. */
11499 if (mode == TDmode && (cum->fregno % 2) == 1)
11502 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11503 <= FP_ARG_V4_MAX_REG)
11504 return gen_rtx_REG (mode, cum->fregno);
11510 int n_words = rs6000_arg_size (mode, type);
11511 int gregno = cum->sysv_gregno;
11513 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11514 As does any other 2 word item such as complex int due to a
11515 historical mistake. */
11517 gregno += (1 - gregno) & 1;
11519 /* Multi-reg args are not split between registers and stack. */
11520 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11523 if (TARGET_32BIT && TARGET_POWERPC64)
11524 return rs6000_mixed_function_arg (mode, type,
11525 gregno - GP_ARG_MIN_REG);
11526 return gen_rtx_REG (mode, gregno);
11531 int align_words = rs6000_parm_start (mode, type, cum->words);
11533 /* _Decimal128 must be passed in an even/odd float register pair.
11534 This assumes that the register number is odd when fregno is odd. */
11535 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11538 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11539 && !(TARGET_AIX && !TARGET_ELF
11540 && type != NULL && AGGREGATE_TYPE_P (type)))
11542 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11545 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11548 /* Do we also need to pass this argument in the parameter
11550 if (type && (cum->nargs_prototype <= 0
11551 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11552 && TARGET_XL_COMPAT
11553 && align_words >= GP_ARG_NUM_REG)))
11554 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11556 /* Describe where this argument goes in the fprs. */
11557 for (i = 0; i < n_elts
11558 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11560 /* Check if the argument is split over registers and memory.
11561 This can only ever happen for long double or _Decimal128;
11562 complex types are handled via split_complex_arg. */
11563 machine_mode fmode = elt_mode;
11564 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11566 gcc_assert (FLOAT128_2REG_P (fmode));
11567 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11570 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11571 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11572 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11575 /* If there were not enough FPRs to hold the argument, the rest
11576 usually goes into memory. However, if the current position
11577 is still within the register parameter area, a portion may
11578 actually have to go into GPRs.
11580 Note that it may happen that the portion of the argument
11581 passed in the first "half" of the first GPR was already
11582 passed in the last FPR as well.
11584 For unnamed arguments, we already set up GPRs to cover the
11585 whole argument in rs6000_psave_function_arg, so there is
11586 nothing further to do at this point. */
11587 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11588 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11589 && cum->nargs_prototype > 0)
11591 static bool warned;
11593 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11594 int n_words = rs6000_arg_size (mode, type);
11596 align_words += fpr_words;
11597 n_words -= fpr_words;
11601 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11602 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11603 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11605 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11607 if (!warned && warn_psabi)
11610 inform (input_location,
11611 "the ABI of passing homogeneous %<float%> aggregates"
11612 " has changed in GCC 5");
11616 return rs6000_finish_function_arg (mode, rvec, k);
11618 else if (align_words < GP_ARG_NUM_REG)
11620 if (TARGET_32BIT && TARGET_POWERPC64)
11621 return rs6000_mixed_function_arg (mode, type, align_words);
11623 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11630 /* For an arg passed partly in registers and partly in memory, this is
11631 the number of bytes passed in registers. For args passed entirely in
11632 registers or entirely in memory, zero. When an arg is described by a
11633 PARALLEL, perhaps using more than one register type, this function
11634 returns the number of bytes used by the first element of the PARALLEL. */
11637 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11638 tree type, bool named)
11640 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11641 bool passed_in_gprs = true;
11644 machine_mode elt_mode;
11647 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11649 if (DEFAULT_ABI == ABI_V4)
11652 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11654 /* If we are passing this arg in the fixed parameter save area (gprs or
11655 memory) as well as VRs, we do not use the partial bytes mechanism;
11656 instead, rs6000_function_arg will return a PARALLEL including a memory
11657 element as necessary. Library support functions for IEEE 128-bit are
11658 assumed to not need the value passed both in GPRs and in vector
11660 if (TARGET_64BIT && !cum->prototype
11661 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11664 /* Otherwise, we pass in VRs only. Check for partial copies. */
11665 passed_in_gprs = false;
11666 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11667 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11670 /* In this complicated case we just disable the partial_nregs code. */
11671 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11674 align_words = rs6000_parm_start (mode, type, cum->words);
11676 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11677 && !(TARGET_AIX && !TARGET_ELF
11678 && type != NULL && AGGREGATE_TYPE_P (type)))
11680 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11682 /* If we are passing this arg in the fixed parameter save area
11683 (gprs or memory) as well as FPRs, we do not use the partial
11684 bytes mechanism; instead, rs6000_function_arg will return a
11685 PARALLEL including a memory element as necessary. */
11687 && (cum->nargs_prototype <= 0
11688 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11689 && TARGET_XL_COMPAT
11690 && align_words >= GP_ARG_NUM_REG)))
11693 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11694 passed_in_gprs = false;
11695 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11697 /* Compute number of bytes / words passed in FPRs. If there
11698 is still space available in the register parameter area
11699 *after* that amount, a part of the argument will be passed
11700 in GPRs. In that case, the total amount passed in any
11701 registers is equal to the amount that would have been passed
11702 in GPRs if everything were passed there, so we fall back to
11703 the GPR code below to compute the appropriate value. */
11704 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11705 * MIN (8, GET_MODE_SIZE (elt_mode)));
11706 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11708 if (align_words + fpr_words < GP_ARG_NUM_REG)
11709 passed_in_gprs = true;
11716 && align_words < GP_ARG_NUM_REG
11717 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11718 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11720 if (ret != 0 && TARGET_DEBUG_ARG)
11721 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11726 /* A C expression that indicates when an argument must be passed by
11727 reference. If nonzero for an argument, a copy of that argument is
11728 made in memory and a pointer to the argument is passed instead of
11729 the argument itself. The pointer is passed in whatever way is
11730 appropriate for passing a pointer to that type.
11732 Under V.4, aggregates and long double are passed by reference.
11734 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11735 reference unless the AltiVec vector extension ABI is in force.
11737 As an extension to all ABIs, variable sized types are passed by
11741 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11742 machine_mode mode, const_tree type,
11743 bool named ATTRIBUTE_UNUSED)
11748 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11749 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11751 if (TARGET_DEBUG_ARG)
11752 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11756 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11758 if (TARGET_DEBUG_ARG)
11759 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11763 if (int_size_in_bytes (type) < 0)
11765 if (TARGET_DEBUG_ARG)
11766 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11770 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11771 modes only exist for GCC vector types if -maltivec. */
11772 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11774 if (TARGET_DEBUG_ARG)
11775 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11779 /* Pass synthetic vectors in memory. */
11780 if (TREE_CODE (type) == VECTOR_TYPE
11781 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11783 static bool warned_for_pass_big_vectors = false;
11784 if (TARGET_DEBUG_ARG)
11785 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11786 if (!warned_for_pass_big_vectors)
11788 warning (OPT_Wpsabi, "GCC vector passed by reference: "
11789 "non-standard ABI extension with no compatibility "
11791 warned_for_pass_big_vectors = true;
11799 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11800 already processes. Return true if the parameter must be passed
11801 (fully or partially) on the stack. */
11804 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11810 /* Catch errors. */
11811 if (type == NULL || type == error_mark_node)
11814 /* Handle types with no storage requirement. */
11815 if (TYPE_MODE (type) == VOIDmode)
11818 /* Handle complex types. */
11819 if (TREE_CODE (type) == COMPLEX_TYPE)
11820 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11821 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11823 /* Handle transparent aggregates. */
11824 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11825 && TYPE_TRANSPARENT_AGGR (type))
11826 type = TREE_TYPE (first_field (type));
11828 /* See if this arg was passed by invisible reference. */
11829 if (pass_by_reference (get_cumulative_args (args_so_far),
11830 TYPE_MODE (type), type, true))
11831 type = build_pointer_type (type);
11833 /* Find mode as it is passed by the ABI. */
11834 unsignedp = TYPE_UNSIGNED (type);
11835 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11837 /* If we must pass in stack, we need a stack. */
11838 if (rs6000_must_pass_in_stack (mode, type))
11841 /* If there is no incoming register, we need a stack. */
11842 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11843 if (entry_parm == NULL)
11846 /* Likewise if we need to pass both in registers and on the stack. */
11847 if (GET_CODE (entry_parm) == PARALLEL
11848 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11851 /* Also true if we're partially in registers and partially not. */
11852 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11855 /* Update info on where next arg arrives in registers. */
11856 rs6000_function_arg_advance (args_so_far, mode, type, true);
11860 /* Return true if FUN has no prototype, has a variable argument
11861 list, or passes any parameter in memory. */
11864 rs6000_function_parms_need_stack (tree fun, bool incoming)
11866 tree fntype, result;
11867 CUMULATIVE_ARGS args_so_far_v;
11868 cumulative_args_t args_so_far;
11871 /* Must be a libcall, all of which only use reg parms. */
11876 fntype = TREE_TYPE (fun);
11878 /* Varargs functions need the parameter save area. */
11879 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11882 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11883 args_so_far = pack_cumulative_args (&args_so_far_v);
11885 /* When incoming, we will have been passed the function decl.
11886 It is necessary to use the decl to handle K&R style functions,
11887 where TYPE_ARG_TYPES may not be available. */
11890 gcc_assert (DECL_P (fun));
11891 result = DECL_RESULT (fun);
11894 result = TREE_TYPE (fntype);
11896 if (result && aggregate_value_p (result, fntype))
11898 if (!TYPE_P (result))
11899 result = TREE_TYPE (result);
11900 result = build_pointer_type (result);
11901 rs6000_parm_needs_stack (args_so_far, result);
11908 for (parm = DECL_ARGUMENTS (fun);
11909 parm && parm != void_list_node;
11910 parm = TREE_CHAIN (parm))
11911 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11916 function_args_iterator args_iter;
11919 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11920 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11927 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11928 usually a constant depending on the ABI. However, in the ELFv2 ABI
11929 the register parameter area is optional when calling a function that
11930 has a prototype is scope, has no variable argument list, and passes
11931 all parameters in registers. */
11934 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11936 int reg_parm_stack_space;
11938 switch (DEFAULT_ABI)
11941 reg_parm_stack_space = 0;
11946 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11950 /* ??? Recomputing this every time is a bit expensive. Is there
11951 a place to cache this information? */
11952 if (rs6000_function_parms_need_stack (fun, incoming))
11953 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11955 reg_parm_stack_space = 0;
11959 return reg_parm_stack_space;
11963 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11966 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11971 for (i = 0; i < nregs; i++)
11973 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11974 if (reload_completed)
11976 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11979 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11980 i * GET_MODE_SIZE (reg_mode));
11983 tem = replace_equiv_address (tem, XEXP (tem, 0));
11987 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11991 /* Perform any needed actions needed for a function that is receiving a
11992 variable number of arguments.
11996 MODE and TYPE are the mode and type of the current parameter.
11998 PRETEND_SIZE is a variable that should be set to the amount of stack
11999 that must be pushed by the prolog to pretend that our caller pushed
12002 Normally, this macro will push all remaining incoming registers on the
12003 stack and set PRETEND_SIZE to the length of the registers pushed. */
12006 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12007 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12010 CUMULATIVE_ARGS next_cum;
12011 int reg_size = TARGET_32BIT ? 4 : 8;
12012 rtx save_area = NULL_RTX, mem;
12013 int first_reg_offset;
12014 alias_set_type set;
12016 /* Skip the last named argument. */
12017 next_cum = *get_cumulative_args (cum);
12018 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12020 if (DEFAULT_ABI == ABI_V4)
12022 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12026 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12027 HOST_WIDE_INT offset = 0;
12029 /* Try to optimize the size of the varargs save area.
12030 The ABI requires that ap.reg_save_area is doubleword
12031 aligned, but we don't need to allocate space for all
12032 the bytes, only those to which we actually will save
12034 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12035 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12036 if (TARGET_HARD_FLOAT
12037 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12038 && cfun->va_list_fpr_size)
12041 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12042 * UNITS_PER_FP_WORD;
12043 if (cfun->va_list_fpr_size
12044 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12045 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12047 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12048 * UNITS_PER_FP_WORD;
12052 offset = -((first_reg_offset * reg_size) & ~7);
12053 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12055 gpr_reg_num = cfun->va_list_gpr_size;
12056 if (reg_size == 4 && (first_reg_offset & 1))
12059 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12062 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12063 * UNITS_PER_FP_WORD
12064 - (int) (GP_ARG_NUM_REG * reg_size);
12066 if (gpr_size + fpr_size)
12069 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12070 gcc_assert (MEM_P (reg_save_area));
12071 reg_save_area = XEXP (reg_save_area, 0);
12072 if (GET_CODE (reg_save_area) == PLUS)
12074 gcc_assert (XEXP (reg_save_area, 0)
12075 == virtual_stack_vars_rtx);
12076 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
12077 offset += INTVAL (XEXP (reg_save_area, 1));
12080 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12083 cfun->machine->varargs_save_offset = offset;
12084 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12089 first_reg_offset = next_cum.words;
12090 save_area = crtl->args.internal_arg_pointer;
12092 if (targetm.calls.must_pass_in_stack (mode, type))
12093 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12096 set = get_varargs_alias_set ();
12097 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12098 && cfun->va_list_gpr_size)
12100 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12102 if (va_list_gpr_counter_field)
12103 /* V4 va_list_gpr_size counts number of registers needed. */
12104 n_gpr = cfun->va_list_gpr_size;
12106 /* char * va_list instead counts number of bytes needed. */
12107 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12112 mem = gen_rtx_MEM (BLKmode,
12113 plus_constant (Pmode, save_area,
12114 first_reg_offset * reg_size));
12115 MEM_NOTRAP_P (mem) = 1;
12116 set_mem_alias_set (mem, set);
12117 set_mem_align (mem, BITS_PER_WORD);
12119 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12123 /* Save FP registers if needed. */
12124 if (DEFAULT_ABI == ABI_V4
12125 && TARGET_HARD_FLOAT
12127 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12128 && cfun->va_list_fpr_size)
12130 int fregno = next_cum.fregno, nregs;
12131 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12132 rtx lab = gen_label_rtx ();
12133 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12134 * UNITS_PER_FP_WORD);
12137 (gen_rtx_SET (pc_rtx,
12138 gen_rtx_IF_THEN_ELSE (VOIDmode,
12139 gen_rtx_NE (VOIDmode, cr1,
12141 gen_rtx_LABEL_REF (VOIDmode, lab),
12145 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12146 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12148 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12149 plus_constant (Pmode, save_area, off));
12150 MEM_NOTRAP_P (mem) = 1;
12151 set_mem_alias_set (mem, set);
12152 set_mem_align (mem, GET_MODE_ALIGNMENT (
12153 TARGET_HARD_FLOAT ? DFmode : SFmode));
12154 emit_move_insn (mem, gen_rtx_REG (
12155 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12162 /* Create the va_list data type. */
12165 rs6000_build_builtin_va_list (void)
12167 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12169 /* For AIX, prefer 'char *' because that's what the system
12170 header files like. */
12171 if (DEFAULT_ABI != ABI_V4)
12172 return build_pointer_type (char_type_node);
12174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12175 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12176 get_identifier ("__va_list_tag"), record);
12178 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12179 unsigned_char_type_node);
12180 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12181 unsigned_char_type_node);
12182 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12183 every user file. */
12184 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12185 get_identifier ("reserved"), short_unsigned_type_node);
12186 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12187 get_identifier ("overflow_arg_area"),
12189 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12190 get_identifier ("reg_save_area"),
12193 va_list_gpr_counter_field = f_gpr;
12194 va_list_fpr_counter_field = f_fpr;
12196 DECL_FIELD_CONTEXT (f_gpr) = record;
12197 DECL_FIELD_CONTEXT (f_fpr) = record;
12198 DECL_FIELD_CONTEXT (f_res) = record;
12199 DECL_FIELD_CONTEXT (f_ovf) = record;
12200 DECL_FIELD_CONTEXT (f_sav) = record;
12202 TYPE_STUB_DECL (record) = type_decl;
12203 TYPE_NAME (record) = type_decl;
12204 TYPE_FIELDS (record) = f_gpr;
12205 DECL_CHAIN (f_gpr) = f_fpr;
12206 DECL_CHAIN (f_fpr) = f_res;
12207 DECL_CHAIN (f_res) = f_ovf;
12208 DECL_CHAIN (f_ovf) = f_sav;
12210 layout_type (record);
12212 /* The correct type is an array type of one element. */
12213 return build_array_type (record, build_index_type (size_zero_node));
12216 /* Implement va_start. */
12219 rs6000_va_start (tree valist, rtx nextarg)
12221 HOST_WIDE_INT words, n_gpr, n_fpr;
12222 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12223 tree gpr, fpr, ovf, sav, t;
12225 /* Only SVR4 needs something special. */
12226 if (DEFAULT_ABI != ABI_V4)
12228 std_expand_builtin_va_start (valist, nextarg);
12232 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12233 f_fpr = DECL_CHAIN (f_gpr);
12234 f_res = DECL_CHAIN (f_fpr);
12235 f_ovf = DECL_CHAIN (f_res);
12236 f_sav = DECL_CHAIN (f_ovf);
12238 valist = build_simple_mem_ref (valist);
12239 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12240 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12242 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12244 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12247 /* Count number of gp and fp argument registers used. */
12248 words = crtl->args.info.words;
12249 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12251 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12254 if (TARGET_DEBUG_ARG)
12255 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12256 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12257 words, n_gpr, n_fpr);
12259 if (cfun->va_list_gpr_size)
12261 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12262 build_int_cst (NULL_TREE, n_gpr));
12263 TREE_SIDE_EFFECTS (t) = 1;
12264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12267 if (cfun->va_list_fpr_size)
12269 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12270 build_int_cst (NULL_TREE, n_fpr));
12271 TREE_SIDE_EFFECTS (t) = 1;
12272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12274 #ifdef HAVE_AS_GNU_ATTRIBUTE
12275 if (call_ABI_of_interest (cfun->decl))
12276 rs6000_passes_float = true;
12280 /* Find the overflow area. */
12281 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12283 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12284 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12285 TREE_SIDE_EFFECTS (t) = 1;
12286 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12288 /* If there were no va_arg invocations, don't set up the register
12290 if (!cfun->va_list_gpr_size
12291 && !cfun->va_list_fpr_size
12292 && n_gpr < GP_ARG_NUM_REG
12293 && n_fpr < FP_ARG_V4_MAX_REG)
12296 /* Find the register save area. */
12297 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12298 if (cfun->machine->varargs_save_offset)
12299 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12300 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12301 TREE_SIDE_EFFECTS (t) = 1;
12302 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12305 /* Implement va_arg. */
12308 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12309 gimple_seq *post_p)
12311 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12312 tree gpr, fpr, ovf, sav, reg, t, u;
12313 int size, rsize, n_reg, sav_ofs, sav_scale;
12314 tree lab_false, lab_over, addr;
12316 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12320 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12322 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12323 return build_va_arg_indirect_ref (t);
12326 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12327 earlier version of gcc, with the property that it always applied alignment
12328 adjustments to the va-args (even for zero-sized types). The cheapest way
12329 to deal with this is to replicate the effect of the part of
12330 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12332 We don't need to check for pass-by-reference because of the test above.
12333 We can return a simplifed answer, since we know there's no offset to add. */
12336 && rs6000_darwin64_abi)
12337 || DEFAULT_ABI == ABI_ELFv2
12338 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12339 && integer_zerop (TYPE_SIZE (type)))
12341 unsigned HOST_WIDE_INT align, boundary;
12342 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12343 align = PARM_BOUNDARY / BITS_PER_UNIT;
12344 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12345 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12346 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12347 boundary /= BITS_PER_UNIT;
12348 if (boundary > align)
12351 /* This updates arg ptr by the amount that would be necessary
12352 to align the zero-sized (but not zero-alignment) item. */
12353 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12354 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12355 gimplify_and_add (t, pre_p);
12357 t = fold_convert (sizetype, valist_tmp);
12358 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12359 fold_convert (TREE_TYPE (valist),
12360 fold_build2 (BIT_AND_EXPR, sizetype, t,
12361 size_int (-boundary))));
12362 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12363 gimplify_and_add (t, pre_p);
12365 /* Since it is zero-sized there's no increment for the item itself. */
12366 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12367 return build_va_arg_indirect_ref (valist_tmp);
12370 if (DEFAULT_ABI != ABI_V4)
12372 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12374 tree elem_type = TREE_TYPE (type);
12375 machine_mode elem_mode = TYPE_MODE (elem_type);
12376 int elem_size = GET_MODE_SIZE (elem_mode);
12378 if (elem_size < UNITS_PER_WORD)
12380 tree real_part, imag_part;
12381 gimple_seq post = NULL;
12383 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12385 /* Copy the value into a temporary, lest the formal temporary
12386 be reused out from under us. */
12387 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12388 gimple_seq_add_seq (pre_p, post);
12390 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12393 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12397 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12400 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12401 f_fpr = DECL_CHAIN (f_gpr);
12402 f_res = DECL_CHAIN (f_fpr);
12403 f_ovf = DECL_CHAIN (f_res);
12404 f_sav = DECL_CHAIN (f_ovf);
12406 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12407 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12411 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12414 size = int_size_in_bytes (type);
12415 rsize = (size + 3) / 4;
12416 int pad = 4 * rsize - size;
12419 machine_mode mode = TYPE_MODE (type);
12420 if (abi_v4_pass_in_fpr (mode, false))
12422 /* FP args go in FP registers, if present. */
12424 n_reg = (size + 7) / 8;
12425 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12426 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12427 if (mode != SFmode && mode != SDmode)
12432 /* Otherwise into GP registers. */
12441 /* Pull the value out of the saved registers.... */
12444 addr = create_tmp_var (ptr_type_node, "addr");
12446 /* AltiVec vectors never go in registers when -mabi=altivec. */
12447 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12451 lab_false = create_artificial_label (input_location);
12452 lab_over = create_artificial_label (input_location);
12454 /* Long long is aligned in the registers. As are any other 2 gpr
12455 item such as complex int due to a historical mistake. */
12457 if (n_reg == 2 && reg == gpr)
12460 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12461 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12462 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12463 unshare_expr (reg), u);
12465 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12466 reg number is 0 for f1, so we want to make it odd. */
12467 else if (reg == fpr && mode == TDmode)
12469 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12470 build_int_cst (TREE_TYPE (reg), 1));
12471 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12474 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12475 t = build2 (GE_EXPR, boolean_type_node, u, t);
12476 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12477 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12478 gimplify_and_add (t, pre_p);
12482 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12484 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12485 build_int_cst (TREE_TYPE (reg), n_reg));
12486 u = fold_convert (sizetype, u);
12487 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12488 t = fold_build_pointer_plus (t, u);
12490 /* _Decimal32 varargs are located in the second word of the 64-bit
12491 FP register for 32-bit binaries. */
12492 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12493 t = fold_build_pointer_plus_hwi (t, size);
12495 /* Args are passed right-aligned. */
12496 if (BYTES_BIG_ENDIAN)
12497 t = fold_build_pointer_plus_hwi (t, pad);
12499 gimplify_assign (addr, t, pre_p);
12501 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12503 stmt = gimple_build_label (lab_false);
12504 gimple_seq_add_stmt (pre_p, stmt);
12506 if ((n_reg == 2 && !regalign) || n_reg > 2)
12508 /* Ensure that we don't find any more args in regs.
12509 Alignment has taken care of for special cases. */
12510 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12514 /* ... otherwise out of the overflow area. */
12516 /* Care for on-stack alignment if needed. */
12520 t = fold_build_pointer_plus_hwi (t, align - 1);
12521 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12522 build_int_cst (TREE_TYPE (t), -align));
12525 /* Args are passed right-aligned. */
12526 if (BYTES_BIG_ENDIAN)
12527 t = fold_build_pointer_plus_hwi (t, pad);
12529 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12531 gimplify_assign (unshare_expr (addr), t, pre_p);
12533 t = fold_build_pointer_plus_hwi (t, size);
12534 gimplify_assign (unshare_expr (ovf), t, pre_p);
12538 stmt = gimple_build_label (lab_over);
12539 gimple_seq_add_stmt (pre_p, stmt);
12542 if (STRICT_ALIGNMENT
12543 && (TYPE_ALIGN (type)
12544 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12546 /* The value (of type complex double, for example) may not be
12547 aligned in memory in the saved registers, so copy via a
12548 temporary. (This is the same code as used for SPARC.) */
12549 tree tmp = create_tmp_var (type, "va_arg_tmp");
12550 tree dest_addr = build_fold_addr_expr (tmp);
12552 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12553 3, dest_addr, addr, size_int (rsize * 4));
12554 TREE_ADDRESSABLE (tmp) = 1;
12556 gimplify_and_add (copy, pre_p);
12560 addr = fold_convert (ptrtype, addr);
12561 return build_va_arg_indirect_ref (addr);
12567 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12570 unsigned classify = rs6000_builtin_info[(int)code].attr;
12571 const char *attr_string = "";
12573 gcc_assert (name != NULL);
12574 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12576 if (rs6000_builtin_decls[(int)code])
12577 fatal_error (input_location,
12578 "internal error: builtin function %qs already processed",
12581 rs6000_builtin_decls[(int)code] = t =
12582 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12584 /* Set any special attributes. */
12585 if ((classify & RS6000_BTC_CONST) != 0)
12587 /* const function, function only depends on the inputs. */
12588 TREE_READONLY (t) = 1;
12589 TREE_NOTHROW (t) = 1;
12590 attr_string = ", const";
12592 else if ((classify & RS6000_BTC_PURE) != 0)
12594 /* pure function, function can read global memory, but does not set any
12596 DECL_PURE_P (t) = 1;
12597 TREE_NOTHROW (t) = 1;
12598 attr_string = ", pure";
12600 else if ((classify & RS6000_BTC_FP) != 0)
12602 /* Function is a math function. If rounding mode is on, then treat the
12603 function as not reading global memory, but it can have arbitrary side
12604 effects. If it is off, then assume the function is a const function.
12605 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12606 builtin-attribute.def that is used for the math functions. */
12607 TREE_NOTHROW (t) = 1;
12608 if (flag_rounding_math)
12610 DECL_PURE_P (t) = 1;
12611 DECL_IS_NOVOPS (t) = 1;
12612 attr_string = ", fp, pure";
12616 TREE_READONLY (t) = 1;
12617 attr_string = ", fp, const";
12620 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12621 gcc_unreachable ();
12623 if (TARGET_DEBUG_BUILTIN)
12624 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12625 (int)code, name, attr_string);
12628 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12630 #undef RS6000_BUILTIN_0
12631 #undef RS6000_BUILTIN_1
12632 #undef RS6000_BUILTIN_2
12633 #undef RS6000_BUILTIN_3
12634 #undef RS6000_BUILTIN_A
12635 #undef RS6000_BUILTIN_D
12636 #undef RS6000_BUILTIN_H
12637 #undef RS6000_BUILTIN_P
12638 #undef RS6000_BUILTIN_X
12640 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12641 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12642 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12643 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12644 { MASK, ICODE, NAME, ENUM },
12646 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12647 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12648 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12649 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12650 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12652 static const struct builtin_description bdesc_3arg[] =
12654 #include "rs6000-builtin.def"
12657 /* DST operations: void foo (void *, const int, const char). */
12659 #undef RS6000_BUILTIN_0
12660 #undef RS6000_BUILTIN_1
12661 #undef RS6000_BUILTIN_2
12662 #undef RS6000_BUILTIN_3
12663 #undef RS6000_BUILTIN_A
12664 #undef RS6000_BUILTIN_D
12665 #undef RS6000_BUILTIN_H
12666 #undef RS6000_BUILTIN_P
12667 #undef RS6000_BUILTIN_X
12669 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12670 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12671 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12672 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12673 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12674 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12675 { MASK, ICODE, NAME, ENUM },
12677 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12678 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12679 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12681 static const struct builtin_description bdesc_dst[] =
12683 #include "rs6000-builtin.def"
12686 /* Simple binary operations: VECc = foo (VECa, VECb). */
12688 #undef RS6000_BUILTIN_0
12689 #undef RS6000_BUILTIN_1
12690 #undef RS6000_BUILTIN_2
12691 #undef RS6000_BUILTIN_3
12692 #undef RS6000_BUILTIN_A
12693 #undef RS6000_BUILTIN_D
12694 #undef RS6000_BUILTIN_H
12695 #undef RS6000_BUILTIN_P
12696 #undef RS6000_BUILTIN_X
12698 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12699 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12700 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12701 { MASK, ICODE, NAME, ENUM },
12703 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12704 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12705 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12706 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12707 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12708 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12710 static const struct builtin_description bdesc_2arg[] =
12712 #include "rs6000-builtin.def"
12715 #undef RS6000_BUILTIN_0
12716 #undef RS6000_BUILTIN_1
12717 #undef RS6000_BUILTIN_2
12718 #undef RS6000_BUILTIN_3
12719 #undef RS6000_BUILTIN_A
12720 #undef RS6000_BUILTIN_D
12721 #undef RS6000_BUILTIN_H
12722 #undef RS6000_BUILTIN_P
12723 #undef RS6000_BUILTIN_X
12725 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12726 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12727 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12728 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12729 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12730 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12731 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12732 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12733 { MASK, ICODE, NAME, ENUM },
12735 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12737 /* AltiVec predicates. */
12739 static const struct builtin_description bdesc_altivec_preds[] =
12741 #include "rs6000-builtin.def"
12744 /* ABS* operations. */
12746 #undef RS6000_BUILTIN_0
12747 #undef RS6000_BUILTIN_1
12748 #undef RS6000_BUILTIN_2
12749 #undef RS6000_BUILTIN_3
12750 #undef RS6000_BUILTIN_A
12751 #undef RS6000_BUILTIN_D
12752 #undef RS6000_BUILTIN_H
12753 #undef RS6000_BUILTIN_P
12754 #undef RS6000_BUILTIN_X
12756 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12757 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12758 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12759 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12760 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12761 { MASK, ICODE, NAME, ENUM },
12763 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12764 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12765 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12766 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12768 static const struct builtin_description bdesc_abs[] =
12770 #include "rs6000-builtin.def"
12773 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12776 #undef RS6000_BUILTIN_0
12777 #undef RS6000_BUILTIN_1
12778 #undef RS6000_BUILTIN_2
12779 #undef RS6000_BUILTIN_3
12780 #undef RS6000_BUILTIN_A
12781 #undef RS6000_BUILTIN_D
12782 #undef RS6000_BUILTIN_H
12783 #undef RS6000_BUILTIN_P
12784 #undef RS6000_BUILTIN_X
12786 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12787 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12788 { MASK, ICODE, NAME, ENUM },
12790 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12791 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12792 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12793 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12794 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12795 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12796 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12798 static const struct builtin_description bdesc_1arg[] =
12800 #include "rs6000-builtin.def"
12803 /* Simple no-argument operations: result = __builtin_darn_32 () */
12805 #undef RS6000_BUILTIN_0
12806 #undef RS6000_BUILTIN_1
12807 #undef RS6000_BUILTIN_2
12808 #undef RS6000_BUILTIN_3
12809 #undef RS6000_BUILTIN_A
12810 #undef RS6000_BUILTIN_D
12811 #undef RS6000_BUILTIN_H
12812 #undef RS6000_BUILTIN_P
12813 #undef RS6000_BUILTIN_X
12815 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
12816 { MASK, ICODE, NAME, ENUM },
12818 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12819 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12820 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12821 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12822 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12823 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12824 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12825 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12827 static const struct builtin_description bdesc_0arg[] =
12829 #include "rs6000-builtin.def"
12832 /* HTM builtins. */
12833 #undef RS6000_BUILTIN_0
12834 #undef RS6000_BUILTIN_1
12835 #undef RS6000_BUILTIN_2
12836 #undef RS6000_BUILTIN_3
12837 #undef RS6000_BUILTIN_A
12838 #undef RS6000_BUILTIN_D
12839 #undef RS6000_BUILTIN_H
12840 #undef RS6000_BUILTIN_P
12841 #undef RS6000_BUILTIN_X
12843 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12844 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12845 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12846 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12847 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12848 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12849 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12850 { MASK, ICODE, NAME, ENUM },
12852 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12853 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12855 static const struct builtin_description bdesc_htm[] =
12857 #include "rs6000-builtin.def"
12860 #undef RS6000_BUILTIN_0
12861 #undef RS6000_BUILTIN_1
12862 #undef RS6000_BUILTIN_2
12863 #undef RS6000_BUILTIN_3
12864 #undef RS6000_BUILTIN_A
12865 #undef RS6000_BUILTIN_D
12866 #undef RS6000_BUILTIN_H
12867 #undef RS6000_BUILTIN_P
12869 /* Return true if a builtin function is overloaded. */
12871 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12873 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12877 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
12879 return rs6000_builtin_info[(int)fncode].name;
12882 /* Expand an expression EXP that calls a builtin without arguments. */
12884 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12887 machine_mode tmode = insn_data[icode].operand[0].mode;
12889 if (icode == CODE_FOR_nothing)
12890 /* Builtin not supported on this processor. */
12893 if (icode == CODE_FOR_rs6000_mffsl
12894 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12896 error ("%<__builtin_mffsl%> not supported with %<-msoft-float%>");
12901 || GET_MODE (target) != tmode
12902 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12903 target = gen_reg_rtx (tmode);
12905 pat = GEN_FCN (icode) (target);
12915 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12918 tree arg0 = CALL_EXPR_ARG (exp, 0);
12919 tree arg1 = CALL_EXPR_ARG (exp, 1);
12920 rtx op0 = expand_normal (arg0);
12921 rtx op1 = expand_normal (arg1);
12922 machine_mode mode0 = insn_data[icode].operand[0].mode;
12923 machine_mode mode1 = insn_data[icode].operand[1].mode;
12925 if (icode == CODE_FOR_nothing)
12926 /* Builtin not supported on this processor. */
12929 /* If we got invalid arguments bail out before generating bad rtl. */
12930 if (arg0 == error_mark_node || arg1 == error_mark_node)
12933 if (!CONST_INT_P (op0)
12934 || INTVAL (op0) > 255
12935 || INTVAL (op0) < 0)
12937 error ("argument 1 must be an 8-bit field value");
12941 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12942 op0 = copy_to_mode_reg (mode0, op0);
12944 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12945 op1 = copy_to_mode_reg (mode1, op1);
12947 pat = GEN_FCN (icode) (op0, op1);
12956 rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
12959 tree arg0 = CALL_EXPR_ARG (exp, 0);
12960 rtx op0 = expand_normal (arg0);
12962 if (icode == CODE_FOR_nothing)
12963 /* Builtin not supported on this processor. */
12966 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12968 error ("%<__builtin_mtfsb0%> and %<__builtin_mtfsb1%> not supported with "
12969 "%<-msoft-float%>");
12973 /* If we got invalid arguments bail out before generating bad rtl. */
12974 if (arg0 == error_mark_node)
12977 /* Only allow bit numbers 0 to 31. */
12978 if (!u5bit_cint_operand (op0, VOIDmode))
12980 error ("Argument must be a constant between 0 and 31.");
12984 pat = GEN_FCN (icode) (op0);
12993 rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
12996 tree arg0 = CALL_EXPR_ARG (exp, 0);
12997 rtx op0 = expand_normal (arg0);
12998 machine_mode mode0 = insn_data[icode].operand[0].mode;
13000 if (icode == CODE_FOR_nothing)
13001 /* Builtin not supported on this processor. */
13004 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13006 error ("%<__builtin_set_fpscr_rn%> not supported with %<-msoft-float%>");
13010 /* If we got invalid arguments bail out before generating bad rtl. */
13011 if (arg0 == error_mark_node)
13014 /* If the argument is a constant, check the range. Argument can only be a
13015 2-bit value. Unfortunately, can't check the range of the value at
13016 compile time if the argument is a variable. The least significant two
13017 bits of the argument, regardless of type, are used to set the rounding
13018 mode. All other bits are ignored. */
13019 if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
13021 error ("Argument must be a value between 0 and 3.");
13025 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13026 op0 = copy_to_mode_reg (mode0, op0);
13028 pat = GEN_FCN (icode) (op0);
13036 rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
13039 tree arg0 = CALL_EXPR_ARG (exp, 0);
13040 rtx op0 = expand_normal (arg0);
13041 machine_mode mode0 = insn_data[icode].operand[0].mode;
13044 /* Builtin not supported in 32-bit mode. */
13045 fatal_error (input_location,
13046 "%<__builtin_set_fpscr_drn%> is not supported "
13049 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13051 error ("%<__builtin_set_fpscr_drn%> not supported with %<-msoft-float%>");
13055 if (icode == CODE_FOR_nothing)
13056 /* Builtin not supported on this processor. */
13059 /* If we got invalid arguments bail out before generating bad rtl. */
13060 if (arg0 == error_mark_node)
13063 /* If the argument is a constant, check the range. Agrument can only be a
13064 3-bit value. Unfortunately, can't check the range of the value at
13065 compile time if the argument is a variable. The least significant two
13066 bits of the argument, regardless of type, are used to set the rounding
13067 mode. All other bits are ignored. */
13068 if (CONST_INT_P (op0) && !const_0_to_7_operand(op0, VOIDmode))
13070 error ("Argument must be a value between 0 and 7.");
13074 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13075 op0 = copy_to_mode_reg (mode0, op0);
13077 pat = GEN_FCN (icode) (op0);
13086 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13089 tree arg0 = CALL_EXPR_ARG (exp, 0);
13090 rtx op0 = expand_normal (arg0);
13091 machine_mode tmode = insn_data[icode].operand[0].mode;
13092 machine_mode mode0 = insn_data[icode].operand[1].mode;
13094 if (icode == CODE_FOR_nothing)
13095 /* Builtin not supported on this processor. */
13098 /* If we got invalid arguments bail out before generating bad rtl. */
13099 if (arg0 == error_mark_node)
13102 if (icode == CODE_FOR_altivec_vspltisb
13103 || icode == CODE_FOR_altivec_vspltish
13104 || icode == CODE_FOR_altivec_vspltisw)
13106 /* Only allow 5-bit *signed* literals. */
13107 if (!CONST_INT_P (op0)
13108 || INTVAL (op0) > 15
13109 || INTVAL (op0) < -16)
13111 error ("argument 1 must be a 5-bit signed literal");
13112 return CONST0_RTX (tmode);
13117 || GET_MODE (target) != tmode
13118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13119 target = gen_reg_rtx (tmode);
13121 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13122 op0 = copy_to_mode_reg (mode0, op0);
13124 pat = GEN_FCN (icode) (target, op0);
13133 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13135 rtx pat, scratch1, scratch2;
13136 tree arg0 = CALL_EXPR_ARG (exp, 0);
13137 rtx op0 = expand_normal (arg0);
13138 machine_mode tmode = insn_data[icode].operand[0].mode;
13139 machine_mode mode0 = insn_data[icode].operand[1].mode;
13141 /* If we have invalid arguments, bail out before generating bad rtl. */
13142 if (arg0 == error_mark_node)
13146 || GET_MODE (target) != tmode
13147 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13148 target = gen_reg_rtx (tmode);
13150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13151 op0 = copy_to_mode_reg (mode0, op0);
13153 scratch1 = gen_reg_rtx (mode0);
13154 scratch2 = gen_reg_rtx (mode0);
13156 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13165 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13168 tree arg0 = CALL_EXPR_ARG (exp, 0);
13169 tree arg1 = CALL_EXPR_ARG (exp, 1);
13170 rtx op0 = expand_normal (arg0);
13171 rtx op1 = expand_normal (arg1);
13172 machine_mode tmode = insn_data[icode].operand[0].mode;
13173 machine_mode mode0 = insn_data[icode].operand[1].mode;
13174 machine_mode mode1 = insn_data[icode].operand[2].mode;
13176 if (icode == CODE_FOR_nothing)
13177 /* Builtin not supported on this processor. */
13180 /* If we got invalid arguments bail out before generating bad rtl. */
13181 if (arg0 == error_mark_node || arg1 == error_mark_node)
13184 if (icode == CODE_FOR_unpackv1ti
13185 || icode == CODE_FOR_unpackkf
13186 || icode == CODE_FOR_unpacktf
13187 || icode == CODE_FOR_unpackif
13188 || icode == CODE_FOR_unpacktd)
13190 /* Only allow 1-bit unsigned literals. */
13192 if (TREE_CODE (arg1) != INTEGER_CST
13193 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13195 error ("argument 2 must be a 1-bit unsigned literal");
13196 return CONST0_RTX (tmode);
13199 else if (icode == CODE_FOR_altivec_vspltw)
13201 /* Only allow 2-bit unsigned literals. */
13203 if (TREE_CODE (arg1) != INTEGER_CST
13204 || TREE_INT_CST_LOW (arg1) & ~3)
13206 error ("argument 2 must be a 2-bit unsigned literal");
13207 return CONST0_RTX (tmode);
13210 else if (icode == CODE_FOR_altivec_vsplth)
13212 /* Only allow 3-bit unsigned literals. */
13214 if (TREE_CODE (arg1) != INTEGER_CST
13215 || TREE_INT_CST_LOW (arg1) & ~7)
13217 error ("argument 2 must be a 3-bit unsigned literal");
13218 return CONST0_RTX (tmode);
13221 else if (icode == CODE_FOR_altivec_vspltb)
13223 /* Only allow 4-bit unsigned literals. */
13225 if (TREE_CODE (arg1) != INTEGER_CST
13226 || TREE_INT_CST_LOW (arg1) & ~15)
13228 error ("argument 2 must be a 4-bit unsigned literal");
13229 return CONST0_RTX (tmode);
13232 else if (icode == CODE_FOR_altivec_vcfux
13233 || icode == CODE_FOR_altivec_vcfsx
13234 || icode == CODE_FOR_altivec_vctsxs
13235 || icode == CODE_FOR_altivec_vctuxs)
13237 /* Only allow 5-bit unsigned literals. */
13239 if (TREE_CODE (arg1) != INTEGER_CST
13240 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13242 error ("argument 2 must be a 5-bit unsigned literal");
13243 return CONST0_RTX (tmode);
13246 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13247 || icode == CODE_FOR_dfptstsfi_lt_dd
13248 || icode == CODE_FOR_dfptstsfi_gt_dd
13249 || icode == CODE_FOR_dfptstsfi_unordered_dd
13250 || icode == CODE_FOR_dfptstsfi_eq_td
13251 || icode == CODE_FOR_dfptstsfi_lt_td
13252 || icode == CODE_FOR_dfptstsfi_gt_td
13253 || icode == CODE_FOR_dfptstsfi_unordered_td)
13255 /* Only allow 6-bit unsigned literals. */
13257 if (TREE_CODE (arg0) != INTEGER_CST
13258 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13260 error ("argument 1 must be a 6-bit unsigned literal");
13261 return CONST0_RTX (tmode);
13264 else if (icode == CODE_FOR_xststdcqp_kf
13265 || icode == CODE_FOR_xststdcqp_tf
13266 || icode == CODE_FOR_xststdcdp
13267 || icode == CODE_FOR_xststdcsp
13268 || icode == CODE_FOR_xvtstdcdp
13269 || icode == CODE_FOR_xvtstdcsp)
13271 /* Only allow 7-bit unsigned literals. */
13273 if (TREE_CODE (arg1) != INTEGER_CST
13274 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13276 error ("argument 2 must be a 7-bit unsigned literal");
13277 return CONST0_RTX (tmode);
13282 || GET_MODE (target) != tmode
13283 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13284 target = gen_reg_rtx (tmode);
13286 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13287 op0 = copy_to_mode_reg (mode0, op0);
13288 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13289 op1 = copy_to_mode_reg (mode1, op1);
13291 pat = GEN_FCN (icode) (target, op0, op1);
13300 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13303 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13304 tree arg0 = CALL_EXPR_ARG (exp, 1);
13305 tree arg1 = CALL_EXPR_ARG (exp, 2);
13306 rtx op0 = expand_normal (arg0);
13307 rtx op1 = expand_normal (arg1);
13308 machine_mode tmode = SImode;
13309 machine_mode mode0 = insn_data[icode].operand[1].mode;
13310 machine_mode mode1 = insn_data[icode].operand[2].mode;
13313 if (TREE_CODE (cr6_form) != INTEGER_CST)
13315 error ("argument 1 of %qs must be a constant",
13316 "__builtin_altivec_predicate");
13320 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13322 gcc_assert (mode0 == mode1);
13324 /* If we have invalid arguments, bail out before generating bad rtl. */
13325 if (arg0 == error_mark_node || arg1 == error_mark_node)
13329 || GET_MODE (target) != tmode
13330 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13331 target = gen_reg_rtx (tmode);
13333 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13334 op0 = copy_to_mode_reg (mode0, op0);
13335 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13336 op1 = copy_to_mode_reg (mode1, op1);
13338 /* Note that for many of the relevant operations (e.g. cmpne or
13339 cmpeq) with float or double operands, it makes more sense for the
13340 mode of the allocated scratch register to select a vector of
13341 integer. But the choice to copy the mode of operand 0 was made
13342 long ago and there are no plans to change it. */
13343 scratch = gen_reg_rtx (mode0);
13345 pat = GEN_FCN (icode) (scratch, op0, op1);
13350 /* The vec_any* and vec_all* predicates use the same opcodes for two
13351 different operations, but the bits in CR6 will be different
13352 depending on what information we want. So we have to play tricks
13353 with CR6 to get the right bits out.
13355 If you think this is disgusting, look at the specs for the
13356 AltiVec predicates. */
13358 switch (cr6_form_int)
13361 emit_insn (gen_cr6_test_for_zero (target));
13364 emit_insn (gen_cr6_test_for_zero_reverse (target));
13367 emit_insn (gen_cr6_test_for_lt (target));
13370 emit_insn (gen_cr6_test_for_lt_reverse (target));
13373 error ("argument 1 of %qs is out of range",
13374 "__builtin_altivec_predicate");
13382 swap_endian_selector_for_mode (machine_mode mode)
13384 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13385 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13386 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13387 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13389 unsigned int *swaparray, i;
13409 gcc_unreachable ();
13412 for (i = 0; i < 16; ++i)
13413 perm[i] = GEN_INT (swaparray[i]);
13415 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13416 gen_rtvec_v (16, perm)));
13420 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13423 tree arg0 = CALL_EXPR_ARG (exp, 0);
13424 tree arg1 = CALL_EXPR_ARG (exp, 1);
13425 machine_mode tmode = insn_data[icode].operand[0].mode;
13426 machine_mode mode0 = Pmode;
13427 machine_mode mode1 = Pmode;
13428 rtx op0 = expand_normal (arg0);
13429 rtx op1 = expand_normal (arg1);
13431 if (icode == CODE_FOR_nothing)
13432 /* Builtin not supported on this processor. */
13435 /* If we got invalid arguments bail out before generating bad rtl. */
13436 if (arg0 == error_mark_node || arg1 == error_mark_node)
13440 || GET_MODE (target) != tmode
13441 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13442 target = gen_reg_rtx (tmode);
13444 op1 = copy_to_mode_reg (mode1, op1);
13446 /* For LVX, express the RTL accurately by ANDing the address with -16.
13447 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13448 so the raw address is fine. */
13449 if (icode == CODE_FOR_altivec_lvx_v1ti
13450 || icode == CODE_FOR_altivec_lvx_v2df
13451 || icode == CODE_FOR_altivec_lvx_v2di
13452 || icode == CODE_FOR_altivec_lvx_v4sf
13453 || icode == CODE_FOR_altivec_lvx_v4si
13454 || icode == CODE_FOR_altivec_lvx_v8hi
13455 || icode == CODE_FOR_altivec_lvx_v16qi)
13458 if (op0 == const0_rtx)
13462 op0 = copy_to_mode_reg (mode0, op0);
13463 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13465 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13466 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13468 emit_insn (gen_rtx_SET (target, addr));
13472 if (op0 == const0_rtx)
13473 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13476 op0 = copy_to_mode_reg (mode0, op0);
13477 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13478 gen_rtx_PLUS (Pmode, op1, op0));
13481 pat = GEN_FCN (icode) (target, addr);
13491 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13494 tree arg0 = CALL_EXPR_ARG (exp, 0);
13495 tree arg1 = CALL_EXPR_ARG (exp, 1);
13496 tree arg2 = CALL_EXPR_ARG (exp, 2);
13497 rtx op0 = expand_normal (arg0);
13498 rtx op1 = expand_normal (arg1);
13499 rtx op2 = expand_normal (arg2);
13500 machine_mode mode0 = insn_data[icode].operand[0].mode;
13501 machine_mode mode1 = insn_data[icode].operand[1].mode;
13502 machine_mode mode2 = insn_data[icode].operand[2].mode;
13504 if (icode == CODE_FOR_nothing)
13505 /* Builtin not supported on this processor. */
13508 /* If we got invalid arguments bail out before generating bad rtl. */
13509 if (arg0 == error_mark_node
13510 || arg1 == error_mark_node
13511 || arg2 == error_mark_node)
13514 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13515 op0 = copy_to_mode_reg (mode0, op0);
13516 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13517 op1 = copy_to_mode_reg (mode1, op1);
13518 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13519 op2 = copy_to_mode_reg (mode2, op2);
13521 pat = GEN_FCN (icode) (op0, op1, op2);
13529 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13531 tree arg0 = CALL_EXPR_ARG (exp, 0);
13532 tree arg1 = CALL_EXPR_ARG (exp, 1);
13533 tree arg2 = CALL_EXPR_ARG (exp, 2);
13534 rtx op0 = expand_normal (arg0);
13535 rtx op1 = expand_normal (arg1);
13536 rtx op2 = expand_normal (arg2);
13537 rtx pat, addr, rawaddr;
13538 machine_mode tmode = insn_data[icode].operand[0].mode;
13539 machine_mode smode = insn_data[icode].operand[1].mode;
13540 machine_mode mode1 = Pmode;
13541 machine_mode mode2 = Pmode;
13543 /* Invalid arguments. Bail before doing anything stoopid! */
13544 if (arg0 == error_mark_node
13545 || arg1 == error_mark_node
13546 || arg2 == error_mark_node)
13549 op2 = copy_to_mode_reg (mode2, op2);
13551 /* For STVX, express the RTL accurately by ANDing the address with -16.
13552 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13553 so the raw address is fine. */
13554 if (icode == CODE_FOR_altivec_stvx_v2df
13555 || icode == CODE_FOR_altivec_stvx_v2di
13556 || icode == CODE_FOR_altivec_stvx_v4sf
13557 || icode == CODE_FOR_altivec_stvx_v4si
13558 || icode == CODE_FOR_altivec_stvx_v8hi
13559 || icode == CODE_FOR_altivec_stvx_v16qi)
13561 if (op1 == const0_rtx)
13565 op1 = copy_to_mode_reg (mode1, op1);
13566 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13569 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13570 addr = gen_rtx_MEM (tmode, addr);
13572 op0 = copy_to_mode_reg (tmode, op0);
13574 emit_insn (gen_rtx_SET (addr, op0));
13578 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13579 op0 = copy_to_mode_reg (smode, op0);
13581 if (op1 == const0_rtx)
13582 addr = gen_rtx_MEM (tmode, op2);
13585 op1 = copy_to_mode_reg (mode1, op1);
13586 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13589 pat = GEN_FCN (icode) (addr, op0);
13597 /* Return the appropriate SPR number associated with the given builtin. */
13598 static inline HOST_WIDE_INT
13599 htm_spr_num (enum rs6000_builtins code)
13601 if (code == HTM_BUILTIN_GET_TFHAR
13602 || code == HTM_BUILTIN_SET_TFHAR)
13604 else if (code == HTM_BUILTIN_GET_TFIAR
13605 || code == HTM_BUILTIN_SET_TFIAR)
13607 else if (code == HTM_BUILTIN_GET_TEXASR
13608 || code == HTM_BUILTIN_SET_TEXASR)
13610 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13611 || code == HTM_BUILTIN_SET_TEXASRU);
13612 return TEXASRU_SPR;
13615 /* Return the correct ICODE value depending on whether we are
13616 setting or reading the HTM SPRs. */
13617 static inline enum insn_code
13618 rs6000_htm_spr_icode (bool nonvoid)
13621 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13623 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13626 /* Expand the HTM builtin in EXP and store the result in TARGET.
13627 Store true in *EXPANDEDP if we found a builtin to expand. */
13629 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13631 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13632 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13633 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13634 const struct builtin_description *d;
13639 if (!TARGET_POWERPC64
13640 && (fcode == HTM_BUILTIN_TABORTDC
13641 || fcode == HTM_BUILTIN_TABORTDCI))
13643 size_t uns_fcode = (size_t)fcode;
13644 const char *name = rs6000_builtin_info[uns_fcode].name;
13645 error ("builtin %qs is only valid in 64-bit mode", name);
13649 /* Expand the HTM builtins. */
13651 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13652 if (d->code == fcode)
13654 rtx op[MAX_HTM_OPERANDS], pat;
13657 call_expr_arg_iterator iter;
13658 unsigned attr = rs6000_builtin_info[fcode].attr;
13659 enum insn_code icode = d->icode;
13660 const struct insn_operand_data *insn_op;
13661 bool uses_spr = (attr & RS6000_BTC_SPR);
13665 icode = rs6000_htm_spr_icode (nonvoid);
13666 insn_op = &insn_data[icode].operand[0];
13670 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
13672 || GET_MODE (target) != tmode
13673 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13674 target = gen_reg_rtx (tmode);
13676 op[nopnds++] = target;
13679 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13681 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13684 insn_op = &insn_data[icode].operand[nopnds];
13686 op[nopnds] = expand_normal (arg);
13688 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13690 if (!strcmp (insn_op->constraint, "n"))
13692 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13693 if (!CONST_INT_P (op[nopnds]))
13694 error ("argument %d must be an unsigned literal", arg_num);
13696 error ("argument %d is an unsigned literal that is "
13697 "out of range", arg_num);
13700 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13706 /* Handle the builtins for extended mnemonics. These accept
13707 no arguments, but map to builtins that take arguments. */
13710 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13711 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13712 op[nopnds++] = GEN_INT (1);
13714 attr |= RS6000_BTC_UNARY;
13716 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13717 op[nopnds++] = GEN_INT (0);
13719 attr |= RS6000_BTC_UNARY;
13725 /* If this builtin accesses SPRs, then pass in the appropriate
13726 SPR number and SPR regno as the last two operands. */
13729 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13730 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13732 /* If this builtin accesses a CR, then pass in a scratch
13733 CR as the last operand. */
13734 else if (attr & RS6000_BTC_CR)
13735 { cr = gen_reg_rtx (CCmode);
13741 int expected_nopnds = 0;
13742 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13743 expected_nopnds = 1;
13744 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13745 expected_nopnds = 2;
13746 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13747 expected_nopnds = 3;
13748 if (!(attr & RS6000_BTC_VOID))
13749 expected_nopnds += 1;
13751 expected_nopnds += 1;
13753 gcc_assert (nopnds == expected_nopnds
13754 && nopnds <= MAX_HTM_OPERANDS);
13760 pat = GEN_FCN (icode) (op[0]);
13763 pat = GEN_FCN (icode) (op[0], op[1]);
13766 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13769 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13772 gcc_unreachable ();
13778 if (attr & RS6000_BTC_CR)
13780 if (fcode == HTM_BUILTIN_TBEGIN)
13782 /* Emit code to set TARGET to true or false depending on
13783 whether the tbegin. instruction successfully or failed
13784 to start a transaction. We do this by placing the 1's
13785 complement of CR's EQ bit into TARGET. */
13786 rtx scratch = gen_reg_rtx (SImode);
13787 emit_insn (gen_rtx_SET (scratch,
13788 gen_rtx_EQ (SImode, cr,
13790 emit_insn (gen_rtx_SET (target,
13791 gen_rtx_XOR (SImode, scratch,
13796 /* Emit code to copy the 4-bit condition register field
13797 CR into the least significant end of register TARGET. */
13798 rtx scratch1 = gen_reg_rtx (SImode);
13799 rtx scratch2 = gen_reg_rtx (SImode);
13800 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13801 emit_insn (gen_movcc (subreg, cr));
13802 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13803 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13812 *expandedp = false;
13816 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13819 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13822 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13823 if (fcode == RS6000_BUILTIN_CPU_INIT)
13826 if (target == 0 || GET_MODE (target) != SImode)
13827 target = gen_reg_rtx (SImode);
13829 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
13830 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
13831 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
13832 to a STRING_CST. */
13833 if (TREE_CODE (arg) == ARRAY_REF
13834 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
13835 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
13836 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
13837 arg = TREE_OPERAND (arg, 0);
13839 if (TREE_CODE (arg) != STRING_CST)
13841 error ("builtin %qs only accepts a string argument",
13842 rs6000_builtin_info[(size_t) fcode].name);
13846 if (fcode == RS6000_BUILTIN_CPU_IS)
13848 const char *cpu = TREE_STRING_POINTER (arg);
13849 rtx cpuid = NULL_RTX;
13850 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
13851 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
13853 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
13854 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
13857 if (cpuid == NULL_RTX)
13859 /* Invalid CPU argument. */
13860 error ("cpu %qs is an invalid argument to builtin %qs",
13861 cpu, rs6000_builtin_info[(size_t) fcode].name);
13865 rtx platform = gen_reg_rtx (SImode);
13866 rtx tcbmem = gen_const_mem (SImode,
13867 gen_rtx_PLUS (Pmode,
13868 gen_rtx_REG (Pmode, TLS_REGNUM),
13869 GEN_INT (TCB_PLATFORM_OFFSET)));
13870 emit_move_insn (platform, tcbmem);
13871 emit_insn (gen_eqsi3 (target, platform, cpuid));
13873 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
13875 const char *hwcap = TREE_STRING_POINTER (arg);
13876 rtx mask = NULL_RTX;
13878 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
13879 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
13881 mask = GEN_INT (cpu_supports_info[i].mask);
13882 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
13885 if (mask == NULL_RTX)
13887 /* Invalid HWCAP argument. */
13888 error ("%s %qs is an invalid argument to builtin %qs",
13889 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
13893 rtx tcb_hwcap = gen_reg_rtx (SImode);
13894 rtx tcbmem = gen_const_mem (SImode,
13895 gen_rtx_PLUS (Pmode,
13896 gen_rtx_REG (Pmode, TLS_REGNUM),
13897 GEN_INT (hwcap_offset)));
13898 emit_move_insn (tcb_hwcap, tcbmem);
13899 rtx scratch1 = gen_reg_rtx (SImode);
13900 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
13901 rtx scratch2 = gen_reg_rtx (SImode);
13902 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
13903 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
13906 gcc_unreachable ();
13908 /* Record that we have expanded a CPU builtin, so that we can later
13909 emit a reference to the special symbol exported by LIBC to ensure we
13910 do not link against an old LIBC that doesn't support this feature. */
13911 cpu_builtin_p = true;
13914 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
13915 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
13917 /* For old LIBCs, always return FALSE. */
13918 emit_move_insn (target, GEN_INT (0));
13919 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
13925 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13928 tree arg0 = CALL_EXPR_ARG (exp, 0);
13929 tree arg1 = CALL_EXPR_ARG (exp, 1);
13930 tree arg2 = CALL_EXPR_ARG (exp, 2);
13931 rtx op0 = expand_normal (arg0);
13932 rtx op1 = expand_normal (arg1);
13933 rtx op2 = expand_normal (arg2);
13934 machine_mode tmode = insn_data[icode].operand[0].mode;
13935 machine_mode mode0 = insn_data[icode].operand[1].mode;
13936 machine_mode mode1 = insn_data[icode].operand[2].mode;
13937 machine_mode mode2 = insn_data[icode].operand[3].mode;
13939 if (icode == CODE_FOR_nothing)
13940 /* Builtin not supported on this processor. */
13943 /* If we got invalid arguments bail out before generating bad rtl. */
13944 if (arg0 == error_mark_node
13945 || arg1 == error_mark_node
13946 || arg2 == error_mark_node)
13949 /* Check and prepare argument depending on the instruction code.
13951 Note that a switch statement instead of the sequence of tests
13952 would be incorrect as many of the CODE_FOR values could be
13953 CODE_FOR_nothing and that would yield multiple alternatives
13954 with identical values. We'd never reach here at runtime in
13956 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13957 || icode == CODE_FOR_altivec_vsldoi_v2df
13958 || icode == CODE_FOR_altivec_vsldoi_v4si
13959 || icode == CODE_FOR_altivec_vsldoi_v8hi
13960 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13962 /* Only allow 4-bit unsigned literals. */
13964 if (TREE_CODE (arg2) != INTEGER_CST
13965 || TREE_INT_CST_LOW (arg2) & ~0xf)
13967 error ("argument 3 must be a 4-bit unsigned literal");
13968 return CONST0_RTX (tmode);
13971 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13972 || icode == CODE_FOR_vsx_xxpermdi_v2di
13973 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
13974 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
13975 || icode == CODE_FOR_vsx_xxpermdi_v1ti
13976 || icode == CODE_FOR_vsx_xxpermdi_v4sf
13977 || icode == CODE_FOR_vsx_xxpermdi_v4si
13978 || icode == CODE_FOR_vsx_xxpermdi_v8hi
13979 || icode == CODE_FOR_vsx_xxpermdi_v16qi
13980 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13981 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13982 || icode == CODE_FOR_vsx_xxsldwi_v4si
13983 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13984 || icode == CODE_FOR_vsx_xxsldwi_v2di
13985 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13987 /* Only allow 2-bit unsigned literals. */
13989 if (TREE_CODE (arg2) != INTEGER_CST
13990 || TREE_INT_CST_LOW (arg2) & ~0x3)
13992 error ("argument 3 must be a 2-bit unsigned literal");
13993 return CONST0_RTX (tmode);
13996 else if (icode == CODE_FOR_vsx_set_v2df
13997 || icode == CODE_FOR_vsx_set_v2di
13998 || icode == CODE_FOR_bcdadd
13999 || icode == CODE_FOR_bcdadd_lt
14000 || icode == CODE_FOR_bcdadd_eq
14001 || icode == CODE_FOR_bcdadd_gt
14002 || icode == CODE_FOR_bcdsub
14003 || icode == CODE_FOR_bcdsub_lt
14004 || icode == CODE_FOR_bcdsub_eq
14005 || icode == CODE_FOR_bcdsub_gt)
14007 /* Only allow 1-bit unsigned literals. */
14009 if (TREE_CODE (arg2) != INTEGER_CST
14010 || TREE_INT_CST_LOW (arg2) & ~0x1)
14012 error ("argument 3 must be a 1-bit unsigned literal");
14013 return CONST0_RTX (tmode);
14016 else if (icode == CODE_FOR_dfp_ddedpd_dd
14017 || icode == CODE_FOR_dfp_ddedpd_td)
14019 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14021 if (TREE_CODE (arg0) != INTEGER_CST
14022 || TREE_INT_CST_LOW (arg2) & ~0x3)
14024 error ("argument 1 must be 0 or 2");
14025 return CONST0_RTX (tmode);
14028 else if (icode == CODE_FOR_dfp_denbcd_dd
14029 || icode == CODE_FOR_dfp_denbcd_td)
14031 /* Only allow 1-bit unsigned literals. */
14033 if (TREE_CODE (arg0) != INTEGER_CST
14034 || TREE_INT_CST_LOW (arg0) & ~0x1)
14036 error ("argument 1 must be a 1-bit unsigned literal");
14037 return CONST0_RTX (tmode);
14040 else if (icode == CODE_FOR_dfp_dscli_dd
14041 || icode == CODE_FOR_dfp_dscli_td
14042 || icode == CODE_FOR_dfp_dscri_dd
14043 || icode == CODE_FOR_dfp_dscri_td)
14045 /* Only allow 6-bit unsigned literals. */
14047 if (TREE_CODE (arg1) != INTEGER_CST
14048 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14050 error ("argument 2 must be a 6-bit unsigned literal");
14051 return CONST0_RTX (tmode);
14054 else if (icode == CODE_FOR_crypto_vshasigmaw
14055 || icode == CODE_FOR_crypto_vshasigmad)
14057 /* Check whether the 2nd and 3rd arguments are integer constants and in
14058 range and prepare arguments. */
14060 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14062 error ("argument 2 must be 0 or 1");
14063 return CONST0_RTX (tmode);
14067 if (TREE_CODE (arg2) != INTEGER_CST
14068 || wi::geu_p (wi::to_wide (arg2), 16))
14070 error ("argument 3 must be in the range [0, 15]");
14071 return CONST0_RTX (tmode);
14076 || GET_MODE (target) != tmode
14077 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14078 target = gen_reg_rtx (tmode);
14080 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14081 op0 = copy_to_mode_reg (mode0, op0);
14082 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14083 op1 = copy_to_mode_reg (mode1, op1);
14084 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14085 op2 = copy_to_mode_reg (mode2, op2);
14087 pat = GEN_FCN (icode) (target, op0, op1, op2);
14096 /* Expand the dst builtins. */
14098 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14101 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14102 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14103 tree arg0, arg1, arg2;
14104 machine_mode mode0, mode1;
14105 rtx pat, op0, op1, op2;
14106 const struct builtin_description *d;
14109 *expandedp = false;
14111 /* Handle DST variants. */
14113 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14114 if (d->code == fcode)
14116 arg0 = CALL_EXPR_ARG (exp, 0);
14117 arg1 = CALL_EXPR_ARG (exp, 1);
14118 arg2 = CALL_EXPR_ARG (exp, 2);
14119 op0 = expand_normal (arg0);
14120 op1 = expand_normal (arg1);
14121 op2 = expand_normal (arg2);
14122 mode0 = insn_data[d->icode].operand[0].mode;
14123 mode1 = insn_data[d->icode].operand[1].mode;
14125 /* Invalid arguments, bail out before generating bad rtl. */
14126 if (arg0 == error_mark_node
14127 || arg1 == error_mark_node
14128 || arg2 == error_mark_node)
14133 if (TREE_CODE (arg2) != INTEGER_CST
14134 || TREE_INT_CST_LOW (arg2) & ~0x3)
14136 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14140 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14141 op0 = copy_to_mode_reg (Pmode, op0);
14142 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14143 op1 = copy_to_mode_reg (mode1, op1);
14145 pat = GEN_FCN (d->icode) (op0, op1, op2);
14155 /* Expand vec_init builtin. */
14157 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14159 machine_mode tmode = TYPE_MODE (type);
14160 machine_mode inner_mode = GET_MODE_INNER (tmode);
14161 int i, n_elt = GET_MODE_NUNITS (tmode);
14163 gcc_assert (VECTOR_MODE_P (tmode));
14164 gcc_assert (n_elt == call_expr_nargs (exp));
14166 if (!target || !register_operand (target, tmode))
14167 target = gen_reg_rtx (tmode);
14169 /* If we have a vector compromised of a single element, such as V1TImode, do
14170 the initialization directly. */
14171 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14173 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14174 emit_move_insn (target, gen_lowpart (tmode, x));
14178 rtvec v = rtvec_alloc (n_elt);
14180 for (i = 0; i < n_elt; ++i)
14182 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14183 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14186 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14192 /* Return the integer constant in ARG. Constrain it to be in the range
14193 of the subparts of VEC_TYPE; issue an error if not. */
14196 get_element_number (tree vec_type, tree arg)
14198 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14200 if (!tree_fits_uhwi_p (arg)
14201 || (elt = tree_to_uhwi (arg), elt > max))
14203 error ("selector must be an integer constant in the range [0, %wi]", max);
14210 /* Expand vec_set builtin. */
14212 altivec_expand_vec_set_builtin (tree exp)
14214 machine_mode tmode, mode1;
14215 tree arg0, arg1, arg2;
14219 arg0 = CALL_EXPR_ARG (exp, 0);
14220 arg1 = CALL_EXPR_ARG (exp, 1);
14221 arg2 = CALL_EXPR_ARG (exp, 2);
14223 tmode = TYPE_MODE (TREE_TYPE (arg0));
14224 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14225 gcc_assert (VECTOR_MODE_P (tmode));
14227 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14228 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14229 elt = get_element_number (TREE_TYPE (arg0), arg2);
14231 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14232 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14234 op0 = force_reg (tmode, op0);
14235 op1 = force_reg (mode1, op1);
14237 rs6000_expand_vector_set (op0, op1, elt);
14242 /* Expand vec_ext builtin. */
14244 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14246 machine_mode tmode, mode0;
14251 arg0 = CALL_EXPR_ARG (exp, 0);
14252 arg1 = CALL_EXPR_ARG (exp, 1);
14254 op0 = expand_normal (arg0);
14255 op1 = expand_normal (arg1);
14257 if (TREE_CODE (arg1) == INTEGER_CST)
14259 unsigned HOST_WIDE_INT elt;
14260 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
14261 unsigned int truncated_selector;
14262 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
14263 returns low-order bits of INTEGER_CST for modulo indexing. */
14264 elt = TREE_INT_CST_LOW (arg1);
14265 truncated_selector = elt % size;
14266 op1 = GEN_INT (truncated_selector);
14269 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14270 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14271 gcc_assert (VECTOR_MODE_P (mode0));
14273 op0 = force_reg (mode0, op0);
14275 if (optimize || !target || !register_operand (target, tmode))
14276 target = gen_reg_rtx (tmode);
14278 rs6000_expand_vector_extract (target, op0, op1);
14283 /* Expand the builtin in EXP and store the result in TARGET. Store
14284 true in *EXPANDEDP if we found a builtin to expand. */
14286 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14288 const struct builtin_description *d;
14290 enum insn_code icode;
14291 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14292 tree arg0, arg1, arg2;
14294 machine_mode tmode, mode0;
14295 enum rs6000_builtins fcode
14296 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14298 if (rs6000_overloaded_builtin_p (fcode))
14301 error ("unresolved overload for Altivec builtin %qF", fndecl);
14303 /* Given it is invalid, just generate a normal call. */
14304 return expand_call (exp, target, false);
14307 target = altivec_expand_dst_builtin (exp, target, expandedp);
14315 case ALTIVEC_BUILTIN_STVX_V2DF:
14316 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14317 case ALTIVEC_BUILTIN_STVX_V2DI:
14318 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14319 case ALTIVEC_BUILTIN_STVX_V4SF:
14320 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14321 case ALTIVEC_BUILTIN_STVX:
14322 case ALTIVEC_BUILTIN_STVX_V4SI:
14323 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14324 case ALTIVEC_BUILTIN_STVX_V8HI:
14325 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14326 case ALTIVEC_BUILTIN_STVX_V16QI:
14327 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14328 case ALTIVEC_BUILTIN_STVEBX:
14329 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14330 case ALTIVEC_BUILTIN_STVEHX:
14331 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14332 case ALTIVEC_BUILTIN_STVEWX:
14333 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14334 case ALTIVEC_BUILTIN_STVXL_V2DF:
14335 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14336 case ALTIVEC_BUILTIN_STVXL_V2DI:
14337 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14338 case ALTIVEC_BUILTIN_STVXL_V4SF:
14339 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14340 case ALTIVEC_BUILTIN_STVXL:
14341 case ALTIVEC_BUILTIN_STVXL_V4SI:
14342 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14343 case ALTIVEC_BUILTIN_STVXL_V8HI:
14344 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14345 case ALTIVEC_BUILTIN_STVXL_V16QI:
14346 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14348 case ALTIVEC_BUILTIN_STVLX:
14349 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14350 case ALTIVEC_BUILTIN_STVLXL:
14351 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14352 case ALTIVEC_BUILTIN_STVRX:
14353 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14354 case ALTIVEC_BUILTIN_STVRXL:
14355 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14357 case P9V_BUILTIN_STXVL:
14358 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14360 case P9V_BUILTIN_XST_LEN_R:
14361 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14363 case VSX_BUILTIN_STXVD2X_V1TI:
14364 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14365 case VSX_BUILTIN_STXVD2X_V2DF:
14366 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14367 case VSX_BUILTIN_STXVD2X_V2DI:
14368 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14369 case VSX_BUILTIN_STXVW4X_V4SF:
14370 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14371 case VSX_BUILTIN_STXVW4X_V4SI:
14372 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14373 case VSX_BUILTIN_STXVW4X_V8HI:
14374 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14375 case VSX_BUILTIN_STXVW4X_V16QI:
14376 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14378 /* For the following on big endian, it's ok to use any appropriate
14379 unaligned-supporting store, so use a generic expander. For
14380 little-endian, the exact element-reversing instruction must
14382 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14384 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14385 : CODE_FOR_vsx_st_elemrev_v1ti);
14386 return altivec_expand_stv_builtin (code, exp);
14388 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14390 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14391 : CODE_FOR_vsx_st_elemrev_v2df);
14392 return altivec_expand_stv_builtin (code, exp);
14394 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14396 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14397 : CODE_FOR_vsx_st_elemrev_v2di);
14398 return altivec_expand_stv_builtin (code, exp);
14400 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14402 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14403 : CODE_FOR_vsx_st_elemrev_v4sf);
14404 return altivec_expand_stv_builtin (code, exp);
14406 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14408 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14409 : CODE_FOR_vsx_st_elemrev_v4si);
14410 return altivec_expand_stv_builtin (code, exp);
14412 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14414 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14415 : CODE_FOR_vsx_st_elemrev_v8hi);
14416 return altivec_expand_stv_builtin (code, exp);
14418 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14420 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14421 : CODE_FOR_vsx_st_elemrev_v16qi);
14422 return altivec_expand_stv_builtin (code, exp);
14425 case ALTIVEC_BUILTIN_MFVSCR:
14426 icode = CODE_FOR_altivec_mfvscr;
14427 tmode = insn_data[icode].operand[0].mode;
14430 || GET_MODE (target) != tmode
14431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14432 target = gen_reg_rtx (tmode);
14434 pat = GEN_FCN (icode) (target);
14440 case ALTIVEC_BUILTIN_MTVSCR:
14441 icode = CODE_FOR_altivec_mtvscr;
14442 arg0 = CALL_EXPR_ARG (exp, 0);
14443 op0 = expand_normal (arg0);
14444 mode0 = insn_data[icode].operand[0].mode;
14446 /* If we got invalid arguments bail out before generating bad rtl. */
14447 if (arg0 == error_mark_node)
14450 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14451 op0 = copy_to_mode_reg (mode0, op0);
14453 pat = GEN_FCN (icode) (op0);
14458 case ALTIVEC_BUILTIN_DSSALL:
14459 emit_insn (gen_altivec_dssall ());
14462 case ALTIVEC_BUILTIN_DSS:
14463 icode = CODE_FOR_altivec_dss;
14464 arg0 = CALL_EXPR_ARG (exp, 0);
14466 op0 = expand_normal (arg0);
14467 mode0 = insn_data[icode].operand[0].mode;
14469 /* If we got invalid arguments bail out before generating bad rtl. */
14470 if (arg0 == error_mark_node)
14473 if (TREE_CODE (arg0) != INTEGER_CST
14474 || TREE_INT_CST_LOW (arg0) & ~0x3)
14476 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14480 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14481 op0 = copy_to_mode_reg (mode0, op0);
14483 emit_insn (gen_altivec_dss (op0));
14486 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14487 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14488 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14489 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14490 case VSX_BUILTIN_VEC_INIT_V2DF:
14491 case VSX_BUILTIN_VEC_INIT_V2DI:
14492 case VSX_BUILTIN_VEC_INIT_V1TI:
14493 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14495 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14496 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14497 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14498 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14499 case VSX_BUILTIN_VEC_SET_V2DF:
14500 case VSX_BUILTIN_VEC_SET_V2DI:
14501 case VSX_BUILTIN_VEC_SET_V1TI:
14502 return altivec_expand_vec_set_builtin (exp);
14504 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14505 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14506 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14507 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14508 case VSX_BUILTIN_VEC_EXT_V2DF:
14509 case VSX_BUILTIN_VEC_EXT_V2DI:
14510 case VSX_BUILTIN_VEC_EXT_V1TI:
14511 return altivec_expand_vec_ext_builtin (exp, target);
14513 case P9V_BUILTIN_VEC_EXTRACT4B:
14514 arg1 = CALL_EXPR_ARG (exp, 1);
14517 /* Generate a normal call if it is invalid. */
14518 if (arg1 == error_mark_node)
14519 return expand_call (exp, target, false);
14521 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14523 error ("second argument to %qs must be [0, 12]", "vec_vextract4b");
14524 return expand_call (exp, target, false);
14528 case P9V_BUILTIN_VEC_INSERT4B:
14529 arg2 = CALL_EXPR_ARG (exp, 2);
14532 /* Generate a normal call if it is invalid. */
14533 if (arg2 == error_mark_node)
14534 return expand_call (exp, target, false);
14536 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14538 error ("third argument to %qs must be [0, 12]", "vec_vinsert4b");
14539 return expand_call (exp, target, false);
14545 /* Fall through. */
14548 /* Expand abs* operations. */
14550 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14551 if (d->code == fcode)
14552 return altivec_expand_abs_builtin (d->icode, exp, target);
14554 /* Expand the AltiVec predicates. */
14555 d = bdesc_altivec_preds;
14556 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14557 if (d->code == fcode)
14558 return altivec_expand_predicate_builtin (d->icode, exp, target);
14560 /* LV* are funky. We initialized them differently. */
14563 case ALTIVEC_BUILTIN_LVSL:
14564 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14565 exp, target, false);
14566 case ALTIVEC_BUILTIN_LVSR:
14567 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14568 exp, target, false);
14569 case ALTIVEC_BUILTIN_LVEBX:
14570 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14571 exp, target, false);
14572 case ALTIVEC_BUILTIN_LVEHX:
14573 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14574 exp, target, false);
14575 case ALTIVEC_BUILTIN_LVEWX:
14576 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14577 exp, target, false);
14578 case ALTIVEC_BUILTIN_LVXL_V2DF:
14579 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14580 exp, target, false);
14581 case ALTIVEC_BUILTIN_LVXL_V2DI:
14582 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14583 exp, target, false);
14584 case ALTIVEC_BUILTIN_LVXL_V4SF:
14585 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14586 exp, target, false);
14587 case ALTIVEC_BUILTIN_LVXL:
14588 case ALTIVEC_BUILTIN_LVXL_V4SI:
14589 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14590 exp, target, false);
14591 case ALTIVEC_BUILTIN_LVXL_V8HI:
14592 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14593 exp, target, false);
14594 case ALTIVEC_BUILTIN_LVXL_V16QI:
14595 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14596 exp, target, false);
14597 case ALTIVEC_BUILTIN_LVX_V1TI:
14598 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
14599 exp, target, false);
14600 case ALTIVEC_BUILTIN_LVX_V2DF:
14601 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14602 exp, target, false);
14603 case ALTIVEC_BUILTIN_LVX_V2DI:
14604 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14605 exp, target, false);
14606 case ALTIVEC_BUILTIN_LVX_V4SF:
14607 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14608 exp, target, false);
14609 case ALTIVEC_BUILTIN_LVX:
14610 case ALTIVEC_BUILTIN_LVX_V4SI:
14611 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14612 exp, target, false);
14613 case ALTIVEC_BUILTIN_LVX_V8HI:
14614 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14615 exp, target, false);
14616 case ALTIVEC_BUILTIN_LVX_V16QI:
14617 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14618 exp, target, false);
14619 case ALTIVEC_BUILTIN_LVLX:
14620 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14621 exp, target, true);
14622 case ALTIVEC_BUILTIN_LVLXL:
14623 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14624 exp, target, true);
14625 case ALTIVEC_BUILTIN_LVRX:
14626 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14627 exp, target, true);
14628 case ALTIVEC_BUILTIN_LVRXL:
14629 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14630 exp, target, true);
14631 case VSX_BUILTIN_LXVD2X_V1TI:
14632 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14633 exp, target, false);
14634 case VSX_BUILTIN_LXVD2X_V2DF:
14635 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14636 exp, target, false);
14637 case VSX_BUILTIN_LXVD2X_V2DI:
14638 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14639 exp, target, false);
14640 case VSX_BUILTIN_LXVW4X_V4SF:
14641 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14642 exp, target, false);
14643 case VSX_BUILTIN_LXVW4X_V4SI:
14644 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14645 exp, target, false);
14646 case VSX_BUILTIN_LXVW4X_V8HI:
14647 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14648 exp, target, false);
14649 case VSX_BUILTIN_LXVW4X_V16QI:
14650 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14651 exp, target, false);
14652 /* For the following on big endian, it's ok to use any appropriate
14653 unaligned-supporting load, so use a generic expander. For
14654 little-endian, the exact element-reversing instruction must
14656 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14658 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14659 : CODE_FOR_vsx_ld_elemrev_v2df);
14660 return altivec_expand_lv_builtin (code, exp, target, false);
14662 case VSX_BUILTIN_LD_ELEMREV_V1TI:
14664 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
14665 : CODE_FOR_vsx_ld_elemrev_v1ti);
14666 return altivec_expand_lv_builtin (code, exp, target, false);
14668 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14670 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14671 : CODE_FOR_vsx_ld_elemrev_v2di);
14672 return altivec_expand_lv_builtin (code, exp, target, false);
14674 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14676 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14677 : CODE_FOR_vsx_ld_elemrev_v4sf);
14678 return altivec_expand_lv_builtin (code, exp, target, false);
14680 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14682 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14683 : CODE_FOR_vsx_ld_elemrev_v4si);
14684 return altivec_expand_lv_builtin (code, exp, target, false);
14686 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14688 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14689 : CODE_FOR_vsx_ld_elemrev_v8hi);
14690 return altivec_expand_lv_builtin (code, exp, target, false);
14692 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14694 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14695 : CODE_FOR_vsx_ld_elemrev_v16qi);
14696 return altivec_expand_lv_builtin (code, exp, target, false);
14701 /* Fall through. */
14704 *expandedp = false;
14708 /* Check whether a builtin function is supported in this target
14711 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
14713 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
14714 if ((fnmask & rs6000_builtin_mask) != fnmask)
14720 /* Raise an error message for a builtin function that is called without the
14721 appropriate target options being set. */
14724 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14726 size_t uns_fncode = (size_t) fncode;
14727 const char *name = rs6000_builtin_info[uns_fncode].name;
14728 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14730 gcc_assert (name != NULL);
14731 if ((fnmask & RS6000_BTM_CELL) != 0)
14732 error ("builtin function %qs is only valid for the cell processor", name);
14733 else if ((fnmask & RS6000_BTM_VSX) != 0)
14734 error ("builtin function %qs requires the %qs option", name, "-mvsx");
14735 else if ((fnmask & RS6000_BTM_HTM) != 0)
14736 error ("builtin function %qs requires the %qs option", name, "-mhtm");
14737 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14738 error ("builtin function %qs requires the %qs option", name, "-maltivec");
14739 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14740 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14741 error ("builtin function %qs requires the %qs and %qs options",
14742 name, "-mhard-dfp", "-mpower8-vector");
14743 else if ((fnmask & RS6000_BTM_DFP) != 0)
14744 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
14745 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14746 error ("builtin function %qs requires the %qs option", name,
14747 "-mpower8-vector");
14748 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14749 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14750 error ("builtin function %qs requires the %qs and %qs options",
14751 name, "-mcpu=power9", "-m64");
14752 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14753 error ("builtin function %qs requires the %qs option", name,
14755 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14756 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14757 error ("builtin function %qs requires the %qs and %qs options",
14758 name, "-mcpu=power9", "-m64");
14759 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
14760 error ("builtin function %qs requires the %qs option", name,
14762 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
14764 if (!TARGET_HARD_FLOAT)
14765 error ("builtin function %qs requires the %qs option", name,
14768 error ("builtin function %qs requires the %qs option", name,
14769 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
14771 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14772 error ("builtin function %qs requires the %qs option", name,
14774 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
14775 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
14777 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
14778 error ("builtin function %qs requires the %qs option", name,
14780 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14781 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14782 error ("builtin function %qs requires the %qs (or newer), and "
14783 "%qs or %qs options",
14784 name, "-mcpu=power7", "-m64", "-mpowerpc64");
14786 error ("builtin function %qs is not supported with the current options",
14790 /* Target hook for early folding of built-ins, shamelessly stolen
14794 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
14795 int n_args ATTRIBUTE_UNUSED,
14796 tree *args ATTRIBUTE_UNUSED,
14797 bool ignore ATTRIBUTE_UNUSED)
14799 #ifdef SUBTARGET_FOLD_BUILTIN
14800 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
14806 /* Helper function to sort out which built-ins may be valid without having
14809 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
14813 case ALTIVEC_BUILTIN_STVX_V16QI:
14814 case ALTIVEC_BUILTIN_STVX_V8HI:
14815 case ALTIVEC_BUILTIN_STVX_V4SI:
14816 case ALTIVEC_BUILTIN_STVX_V4SF:
14817 case ALTIVEC_BUILTIN_STVX_V2DI:
14818 case ALTIVEC_BUILTIN_STVX_V2DF:
14819 case VSX_BUILTIN_STXVW4X_V16QI:
14820 case VSX_BUILTIN_STXVW4X_V8HI:
14821 case VSX_BUILTIN_STXVW4X_V4SF:
14822 case VSX_BUILTIN_STXVW4X_V4SI:
14823 case VSX_BUILTIN_STXVD2X_V2DF:
14824 case VSX_BUILTIN_STXVD2X_V2DI:
14831 /* Helper function to handle the gimple folding of a vector compare
14832 operation. This sets up true/false vectors, and uses the
14833 VEC_COND_EXPR operation.
14834 CODE indicates which comparison is to be made. (EQ, GT, ...).
14835 TYPE indicates the type of the result. */
14837 fold_build_vec_cmp (tree_code code, tree type,
14838 tree arg0, tree arg1)
14840 tree cmp_type = build_same_sized_truth_vector_type (type);
14841 tree zero_vec = build_zero_cst (type);
14842 tree minus_one_vec = build_minus_one_cst (type);
14843 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
14844 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
14847 /* Helper function to handle the in-between steps for the
14848 vector compare built-ins. */
14850 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
14852 tree arg0 = gimple_call_arg (stmt, 0);
14853 tree arg1 = gimple_call_arg (stmt, 1);
14854 tree lhs = gimple_call_lhs (stmt);
14855 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
14856 gimple *g = gimple_build_assign (lhs, cmp);
14857 gimple_set_location (g, gimple_location (stmt));
14858 gsi_replace (gsi, g, true);
14861 /* Helper function to map V2DF and V4SF types to their
14862 integral equivalents (V2DI and V4SI). */
14863 tree map_to_integral_tree_type (tree input_tree_type)
14865 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
14866 return input_tree_type;
14869 if (types_compatible_p (TREE_TYPE (input_tree_type),
14870 TREE_TYPE (V2DF_type_node)))
14871 return V2DI_type_node;
14872 else if (types_compatible_p (TREE_TYPE (input_tree_type),
14873 TREE_TYPE (V4SF_type_node)))
14874 return V4SI_type_node;
14876 gcc_unreachable ();
14880 /* Helper function to handle the vector merge[hl] built-ins. The
14881 implementation difference between h and l versions for this code are in
14882 the values used when building of the permute vector for high word versus
14883 low word merge. The variance is keyed off the use_high parameter. */
14885 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
14887 tree arg0 = gimple_call_arg (stmt, 0);
14888 tree arg1 = gimple_call_arg (stmt, 1);
14889 tree lhs = gimple_call_lhs (stmt);
14890 tree lhs_type = TREE_TYPE (lhs);
14891 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14892 int midpoint = n_elts / 2;
14898 /* The permute_type will match the lhs for integral types. For double and
14899 float types, the permute type needs to map to the V2 or V4 type that
14902 permute_type = map_to_integral_tree_type (lhs_type);
14903 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14905 for (int i = 0; i < midpoint; i++)
14907 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14909 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14910 offset + n_elts + i));
14913 tree permute = elts.build ();
14915 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14916 gimple_set_location (g, gimple_location (stmt));
14917 gsi_replace (gsi, g, true);
14920 /* Helper function to handle the vector merge[eo] built-ins. */
14922 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
14924 tree arg0 = gimple_call_arg (stmt, 0);
14925 tree arg1 = gimple_call_arg (stmt, 1);
14926 tree lhs = gimple_call_lhs (stmt);
14927 tree lhs_type = TREE_TYPE (lhs);
14928 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14930 /* The permute_type will match the lhs for integral types. For double and
14931 float types, the permute type needs to map to the V2 or V4 type that
14934 permute_type = map_to_integral_tree_type (lhs_type);
14936 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14938 /* Build the permute vector. */
14939 for (int i = 0; i < n_elts / 2; i++)
14941 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14943 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14944 2*i + use_odd + n_elts));
14947 tree permute = elts.build ();
14949 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14950 gimple_set_location (g, gimple_location (stmt));
14951 gsi_replace (gsi, g, true);
14954 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
14955 a constant, use rs6000_fold_builtin.) */
14958 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
14960 gimple *stmt = gsi_stmt (*gsi);
14961 tree fndecl = gimple_call_fndecl (stmt);
14962 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
14963 enum rs6000_builtins fn_code
14964 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14965 tree arg0, arg1, lhs, temp;
14966 enum tree_code bcode;
14969 size_t uns_fncode = (size_t) fn_code;
14970 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
14971 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
14972 const char *fn_name2 = (icode != CODE_FOR_nothing)
14973 ? get_insn_name ((int) icode)
14976 if (TARGET_DEBUG_BUILTIN)
14977 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
14978 fn_code, fn_name1, fn_name2);
14980 if (!rs6000_fold_gimple)
14983 /* Prevent gimple folding for code that does not have a LHS, unless it is
14984 allowed per the rs6000_builtin_valid_without_lhs helper function. */
14985 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
14988 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
14989 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
14990 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
14996 /* Flavors of vec_add. We deliberately don't expand
14997 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
14998 TImode, resulting in much poorer code generation. */
14999 case ALTIVEC_BUILTIN_VADDUBM:
15000 case ALTIVEC_BUILTIN_VADDUHM:
15001 case ALTIVEC_BUILTIN_VADDUWM:
15002 case P8V_BUILTIN_VADDUDM:
15003 case ALTIVEC_BUILTIN_VADDFP:
15004 case VSX_BUILTIN_XVADDDP:
15007 arg0 = gimple_call_arg (stmt, 0);
15008 arg1 = gimple_call_arg (stmt, 1);
15009 lhs = gimple_call_lhs (stmt);
15010 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
15011 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
15013 /* Ensure the binary operation is performed in a type
15014 that wraps if it is integral type. */
15015 gimple_seq stmts = NULL;
15016 tree type = unsigned_type_for (TREE_TYPE (lhs));
15017 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15019 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15021 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
15022 type, uarg0, uarg1);
15023 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15024 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
15025 build1 (VIEW_CONVERT_EXPR,
15026 TREE_TYPE (lhs), res));
15027 gsi_replace (gsi, g, true);
15030 g = gimple_build_assign (lhs, bcode, arg0, arg1);
15031 gimple_set_location (g, gimple_location (stmt));
15032 gsi_replace (gsi, g, true);
15034 /* Flavors of vec_sub. We deliberately don't expand
15035 P8V_BUILTIN_VSUBUQM. */
15036 case ALTIVEC_BUILTIN_VSUBUBM:
15037 case ALTIVEC_BUILTIN_VSUBUHM:
15038 case ALTIVEC_BUILTIN_VSUBUWM:
15039 case P8V_BUILTIN_VSUBUDM:
15040 case ALTIVEC_BUILTIN_VSUBFP:
15041 case VSX_BUILTIN_XVSUBDP:
15042 bcode = MINUS_EXPR;
15044 case VSX_BUILTIN_XVMULSP:
15045 case VSX_BUILTIN_XVMULDP:
15046 arg0 = gimple_call_arg (stmt, 0);
15047 arg1 = gimple_call_arg (stmt, 1);
15048 lhs = gimple_call_lhs (stmt);
15049 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15050 gimple_set_location (g, gimple_location (stmt));
15051 gsi_replace (gsi, g, true);
15053 /* Even element flavors of vec_mul (signed). */
15054 case ALTIVEC_BUILTIN_VMULESB:
15055 case ALTIVEC_BUILTIN_VMULESH:
15056 case P8V_BUILTIN_VMULESW:
15057 /* Even element flavors of vec_mul (unsigned). */
15058 case ALTIVEC_BUILTIN_VMULEUB:
15059 case ALTIVEC_BUILTIN_VMULEUH:
15060 case P8V_BUILTIN_VMULEUW:
15061 arg0 = gimple_call_arg (stmt, 0);
15062 arg1 = gimple_call_arg (stmt, 1);
15063 lhs = gimple_call_lhs (stmt);
15064 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15065 gimple_set_location (g, gimple_location (stmt));
15066 gsi_replace (gsi, g, true);
15068 /* Odd element flavors of vec_mul (signed). */
15069 case ALTIVEC_BUILTIN_VMULOSB:
15070 case ALTIVEC_BUILTIN_VMULOSH:
15071 case P8V_BUILTIN_VMULOSW:
15072 /* Odd element flavors of vec_mul (unsigned). */
15073 case ALTIVEC_BUILTIN_VMULOUB:
15074 case ALTIVEC_BUILTIN_VMULOUH:
15075 case P8V_BUILTIN_VMULOUW:
15076 arg0 = gimple_call_arg (stmt, 0);
15077 arg1 = gimple_call_arg (stmt, 1);
15078 lhs = gimple_call_lhs (stmt);
15079 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15080 gimple_set_location (g, gimple_location (stmt));
15081 gsi_replace (gsi, g, true);
15083 /* Flavors of vec_div (Integer). */
15084 case VSX_BUILTIN_DIV_V2DI:
15085 case VSX_BUILTIN_UDIV_V2DI:
15086 arg0 = gimple_call_arg (stmt, 0);
15087 arg1 = gimple_call_arg (stmt, 1);
15088 lhs = gimple_call_lhs (stmt);
15089 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15090 gimple_set_location (g, gimple_location (stmt));
15091 gsi_replace (gsi, g, true);
15093 /* Flavors of vec_div (Float). */
15094 case VSX_BUILTIN_XVDIVSP:
15095 case VSX_BUILTIN_XVDIVDP:
15096 arg0 = gimple_call_arg (stmt, 0);
15097 arg1 = gimple_call_arg (stmt, 1);
15098 lhs = gimple_call_lhs (stmt);
15099 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15100 gimple_set_location (g, gimple_location (stmt));
15101 gsi_replace (gsi, g, true);
15103 /* Flavors of vec_and. */
15104 case ALTIVEC_BUILTIN_VAND:
15105 arg0 = gimple_call_arg (stmt, 0);
15106 arg1 = gimple_call_arg (stmt, 1);
15107 lhs = gimple_call_lhs (stmt);
15108 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15109 gimple_set_location (g, gimple_location (stmt));
15110 gsi_replace (gsi, g, true);
15112 /* Flavors of vec_andc. */
15113 case ALTIVEC_BUILTIN_VANDC:
15114 arg0 = gimple_call_arg (stmt, 0);
15115 arg1 = gimple_call_arg (stmt, 1);
15116 lhs = gimple_call_lhs (stmt);
15117 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15118 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15119 gimple_set_location (g, gimple_location (stmt));
15120 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15121 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15122 gimple_set_location (g, gimple_location (stmt));
15123 gsi_replace (gsi, g, true);
15125 /* Flavors of vec_nand. */
15126 case P8V_BUILTIN_VEC_NAND:
15127 case P8V_BUILTIN_NAND_V16QI:
15128 case P8V_BUILTIN_NAND_V8HI:
15129 case P8V_BUILTIN_NAND_V4SI:
15130 case P8V_BUILTIN_NAND_V4SF:
15131 case P8V_BUILTIN_NAND_V2DF:
15132 case P8V_BUILTIN_NAND_V2DI:
15133 arg0 = gimple_call_arg (stmt, 0);
15134 arg1 = gimple_call_arg (stmt, 1);
15135 lhs = gimple_call_lhs (stmt);
15136 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15137 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15138 gimple_set_location (g, gimple_location (stmt));
15139 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15140 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15141 gimple_set_location (g, gimple_location (stmt));
15142 gsi_replace (gsi, g, true);
15144 /* Flavors of vec_or. */
15145 case ALTIVEC_BUILTIN_VOR:
15146 arg0 = gimple_call_arg (stmt, 0);
15147 arg1 = gimple_call_arg (stmt, 1);
15148 lhs = gimple_call_lhs (stmt);
15149 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15150 gimple_set_location (g, gimple_location (stmt));
15151 gsi_replace (gsi, g, true);
15153 /* flavors of vec_orc. */
15154 case P8V_BUILTIN_ORC_V16QI:
15155 case P8V_BUILTIN_ORC_V8HI:
15156 case P8V_BUILTIN_ORC_V4SI:
15157 case P8V_BUILTIN_ORC_V4SF:
15158 case P8V_BUILTIN_ORC_V2DF:
15159 case P8V_BUILTIN_ORC_V2DI:
15160 arg0 = gimple_call_arg (stmt, 0);
15161 arg1 = gimple_call_arg (stmt, 1);
15162 lhs = gimple_call_lhs (stmt);
15163 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15164 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15165 gimple_set_location (g, gimple_location (stmt));
15166 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15167 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15168 gimple_set_location (g, gimple_location (stmt));
15169 gsi_replace (gsi, g, true);
15171 /* Flavors of vec_xor. */
15172 case ALTIVEC_BUILTIN_VXOR:
15173 arg0 = gimple_call_arg (stmt, 0);
15174 arg1 = gimple_call_arg (stmt, 1);
15175 lhs = gimple_call_lhs (stmt);
15176 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15177 gimple_set_location (g, gimple_location (stmt));
15178 gsi_replace (gsi, g, true);
15180 /* Flavors of vec_nor. */
15181 case ALTIVEC_BUILTIN_VNOR:
15182 arg0 = gimple_call_arg (stmt, 0);
15183 arg1 = gimple_call_arg (stmt, 1);
15184 lhs = gimple_call_lhs (stmt);
15185 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15186 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15187 gimple_set_location (g, gimple_location (stmt));
15188 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15189 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15190 gimple_set_location (g, gimple_location (stmt));
15191 gsi_replace (gsi, g, true);
15193 /* flavors of vec_abs. */
15194 case ALTIVEC_BUILTIN_ABS_V16QI:
15195 case ALTIVEC_BUILTIN_ABS_V8HI:
15196 case ALTIVEC_BUILTIN_ABS_V4SI:
15197 case ALTIVEC_BUILTIN_ABS_V4SF:
15198 case P8V_BUILTIN_ABS_V2DI:
15199 case VSX_BUILTIN_XVABSDP:
15200 arg0 = gimple_call_arg (stmt, 0);
15201 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15202 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15204 lhs = gimple_call_lhs (stmt);
15205 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15206 gimple_set_location (g, gimple_location (stmt));
15207 gsi_replace (gsi, g, true);
15209 /* flavors of vec_min. */
15210 case VSX_BUILTIN_XVMINDP:
15211 case P8V_BUILTIN_VMINSD:
15212 case P8V_BUILTIN_VMINUD:
15213 case ALTIVEC_BUILTIN_VMINSB:
15214 case ALTIVEC_BUILTIN_VMINSH:
15215 case ALTIVEC_BUILTIN_VMINSW:
15216 case ALTIVEC_BUILTIN_VMINUB:
15217 case ALTIVEC_BUILTIN_VMINUH:
15218 case ALTIVEC_BUILTIN_VMINUW:
15219 case ALTIVEC_BUILTIN_VMINFP:
15220 arg0 = gimple_call_arg (stmt, 0);
15221 arg1 = gimple_call_arg (stmt, 1);
15222 lhs = gimple_call_lhs (stmt);
15223 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15224 gimple_set_location (g, gimple_location (stmt));
15225 gsi_replace (gsi, g, true);
15227 /* flavors of vec_max. */
15228 case VSX_BUILTIN_XVMAXDP:
15229 case P8V_BUILTIN_VMAXSD:
15230 case P8V_BUILTIN_VMAXUD:
15231 case ALTIVEC_BUILTIN_VMAXSB:
15232 case ALTIVEC_BUILTIN_VMAXSH:
15233 case ALTIVEC_BUILTIN_VMAXSW:
15234 case ALTIVEC_BUILTIN_VMAXUB:
15235 case ALTIVEC_BUILTIN_VMAXUH:
15236 case ALTIVEC_BUILTIN_VMAXUW:
15237 case ALTIVEC_BUILTIN_VMAXFP:
15238 arg0 = gimple_call_arg (stmt, 0);
15239 arg1 = gimple_call_arg (stmt, 1);
15240 lhs = gimple_call_lhs (stmt);
15241 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15242 gimple_set_location (g, gimple_location (stmt));
15243 gsi_replace (gsi, g, true);
15245 /* Flavors of vec_eqv. */
15246 case P8V_BUILTIN_EQV_V16QI:
15247 case P8V_BUILTIN_EQV_V8HI:
15248 case P8V_BUILTIN_EQV_V4SI:
15249 case P8V_BUILTIN_EQV_V4SF:
15250 case P8V_BUILTIN_EQV_V2DF:
15251 case P8V_BUILTIN_EQV_V2DI:
15252 arg0 = gimple_call_arg (stmt, 0);
15253 arg1 = gimple_call_arg (stmt, 1);
15254 lhs = gimple_call_lhs (stmt);
15255 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15256 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15257 gimple_set_location (g, gimple_location (stmt));
15258 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15259 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15260 gimple_set_location (g, gimple_location (stmt));
15261 gsi_replace (gsi, g, true);
15263 /* Flavors of vec_rotate_left. */
15264 case ALTIVEC_BUILTIN_VRLB:
15265 case ALTIVEC_BUILTIN_VRLH:
15266 case ALTIVEC_BUILTIN_VRLW:
15267 case P8V_BUILTIN_VRLD:
15268 arg0 = gimple_call_arg (stmt, 0);
15269 arg1 = gimple_call_arg (stmt, 1);
15270 lhs = gimple_call_lhs (stmt);
15271 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15272 gimple_set_location (g, gimple_location (stmt));
15273 gsi_replace (gsi, g, true);
15275 /* Flavors of vector shift right algebraic.
15276 vec_sra{b,h,w} -> vsra{b,h,w}. */
15277 case ALTIVEC_BUILTIN_VSRAB:
15278 case ALTIVEC_BUILTIN_VSRAH:
15279 case ALTIVEC_BUILTIN_VSRAW:
15280 case P8V_BUILTIN_VSRAD:
15282 arg0 = gimple_call_arg (stmt, 0);
15283 arg1 = gimple_call_arg (stmt, 1);
15284 lhs = gimple_call_lhs (stmt);
15285 tree arg1_type = TREE_TYPE (arg1);
15286 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15287 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15288 location_t loc = gimple_location (stmt);
15289 /* Force arg1 into the range valid matching the arg0 type. */
15290 /* Build a vector consisting of the max valid bit-size values. */
15291 int n_elts = VECTOR_CST_NELTS (arg1);
15292 tree element_size = build_int_cst (unsigned_element_type,
15294 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15295 for (int i = 0; i < n_elts; i++)
15296 elts.safe_push (element_size);
15297 tree modulo_tree = elts.build ();
15298 /* Modulo the provided shift value against that vector. */
15299 gimple_seq stmts = NULL;
15300 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15301 unsigned_arg1_type, arg1);
15302 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15303 unsigned_arg1_type, unsigned_arg1,
15305 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15306 /* And finally, do the shift. */
15307 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
15308 gimple_set_location (g, loc);
15309 gsi_replace (gsi, g, true);
15312 /* Flavors of vector shift left.
15313 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15314 case ALTIVEC_BUILTIN_VSLB:
15315 case ALTIVEC_BUILTIN_VSLH:
15316 case ALTIVEC_BUILTIN_VSLW:
15317 case P8V_BUILTIN_VSLD:
15320 gimple_seq stmts = NULL;
15321 arg0 = gimple_call_arg (stmt, 0);
15322 tree arg0_type = TREE_TYPE (arg0);
15323 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
15324 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
15326 arg1 = gimple_call_arg (stmt, 1);
15327 tree arg1_type = TREE_TYPE (arg1);
15328 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15329 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15330 loc = gimple_location (stmt);
15331 lhs = gimple_call_lhs (stmt);
15332 /* Force arg1 into the range valid matching the arg0 type. */
15333 /* Build a vector consisting of the max valid bit-size values. */
15334 int n_elts = VECTOR_CST_NELTS (arg1);
15335 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
15337 tree element_size = build_int_cst (unsigned_element_type,
15338 tree_size_in_bits / n_elts);
15339 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
15340 for (int i = 0; i < n_elts; i++)
15341 elts.safe_push (element_size);
15342 tree modulo_tree = elts.build ();
15343 /* Modulo the provided shift value against that vector. */
15344 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15345 unsigned_arg1_type, arg1);
15346 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15347 unsigned_arg1_type, unsigned_arg1,
15349 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15350 /* And finally, do the shift. */
15351 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
15352 gimple_set_location (g, gimple_location (stmt));
15353 gsi_replace (gsi, g, true);
15356 /* Flavors of vector shift right. */
15357 case ALTIVEC_BUILTIN_VSRB:
15358 case ALTIVEC_BUILTIN_VSRH:
15359 case ALTIVEC_BUILTIN_VSRW:
15360 case P8V_BUILTIN_VSRD:
15362 arg0 = gimple_call_arg (stmt, 0);
15363 arg1 = gimple_call_arg (stmt, 1);
15364 lhs = gimple_call_lhs (stmt);
15365 tree arg1_type = TREE_TYPE (arg1);
15366 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15367 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15368 location_t loc = gimple_location (stmt);
15369 gimple_seq stmts = NULL;
15370 /* Convert arg0 to unsigned. */
15372 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15373 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15374 /* Force arg1 into the range valid matching the arg0 type. */
15375 /* Build a vector consisting of the max valid bit-size values. */
15376 int n_elts = VECTOR_CST_NELTS (arg1);
15377 tree element_size = build_int_cst (unsigned_element_type,
15379 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15380 for (int i = 0; i < n_elts; i++)
15381 elts.safe_push (element_size);
15382 tree modulo_tree = elts.build ();
15383 /* Modulo the provided shift value against that vector. */
15384 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15385 unsigned_arg1_type, arg1);
15386 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15387 unsigned_arg1_type, unsigned_arg1,
15389 /* Do the shift. */
15391 = gimple_build (&stmts, RSHIFT_EXPR,
15392 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
15393 /* Convert result back to the lhs type. */
15394 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15395 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15396 update_call_from_tree (gsi, res);
15399 /* Vector loads. */
15400 case ALTIVEC_BUILTIN_LVX_V16QI:
15401 case ALTIVEC_BUILTIN_LVX_V8HI:
15402 case ALTIVEC_BUILTIN_LVX_V4SI:
15403 case ALTIVEC_BUILTIN_LVX_V4SF:
15404 case ALTIVEC_BUILTIN_LVX_V2DI:
15405 case ALTIVEC_BUILTIN_LVX_V2DF:
15406 case ALTIVEC_BUILTIN_LVX_V1TI:
15408 arg0 = gimple_call_arg (stmt, 0); // offset
15409 arg1 = gimple_call_arg (stmt, 1); // address
15410 lhs = gimple_call_lhs (stmt);
15411 location_t loc = gimple_location (stmt);
15412 /* Since arg1 may be cast to a different type, just use ptr_type_node
15413 here instead of trying to enforce TBAA on pointer types. */
15414 tree arg1_type = ptr_type_node;
15415 tree lhs_type = TREE_TYPE (lhs);
15416 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15417 the tree using the value from arg0. The resulting type will match
15418 the type of arg1. */
15419 gimple_seq stmts = NULL;
15420 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15421 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15422 arg1_type, arg1, temp_offset);
15423 /* Mask off any lower bits from the address. */
15424 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15425 arg1_type, temp_addr,
15426 build_int_cst (arg1_type, -16));
15427 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15428 if (!is_gimple_mem_ref_addr (aligned_addr))
15430 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15431 gimple *g = gimple_build_assign (t, aligned_addr);
15432 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15435 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15436 take an offset, but since we've already incorporated the offset
15437 above, here we just pass in a zero. */
15439 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15440 build_int_cst (arg1_type, 0)));
15441 gimple_set_location (g, loc);
15442 gsi_replace (gsi, g, true);
15445 /* Vector stores. */
15446 case ALTIVEC_BUILTIN_STVX_V16QI:
15447 case ALTIVEC_BUILTIN_STVX_V8HI:
15448 case ALTIVEC_BUILTIN_STVX_V4SI:
15449 case ALTIVEC_BUILTIN_STVX_V4SF:
15450 case ALTIVEC_BUILTIN_STVX_V2DI:
15451 case ALTIVEC_BUILTIN_STVX_V2DF:
15453 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15454 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15455 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15456 location_t loc = gimple_location (stmt);
15457 tree arg0_type = TREE_TYPE (arg0);
15458 /* Use ptr_type_node (no TBAA) for the arg2_type.
15459 FIXME: (Richard) "A proper fix would be to transition this type as
15460 seen from the frontend to GIMPLE, for example in a similar way we
15461 do for MEM_REFs by piggy-backing that on an extra argument, a
15462 constant zero pointer of the alias pointer type to use (which would
15463 also serve as a type indicator of the store itself). I'd use a
15464 target specific internal function for this (not sure if we can have
15465 those target specific, but I guess if it's folded away then that's
15466 fine) and get away with the overload set." */
15467 tree arg2_type = ptr_type_node;
15468 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15469 the tree using the value from arg0. The resulting type will match
15470 the type of arg2. */
15471 gimple_seq stmts = NULL;
15472 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15473 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15474 arg2_type, arg2, temp_offset);
15475 /* Mask off any lower bits from the address. */
15476 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15477 arg2_type, temp_addr,
15478 build_int_cst (arg2_type, -16));
15479 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15480 if (!is_gimple_mem_ref_addr (aligned_addr))
15482 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15483 gimple *g = gimple_build_assign (t, aligned_addr);
15484 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15487 /* The desired gimple result should be similar to:
15488 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15490 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15491 build_int_cst (arg2_type, 0)), arg0);
15492 gimple_set_location (g, loc);
15493 gsi_replace (gsi, g, true);
15497 /* unaligned Vector loads. */
15498 case VSX_BUILTIN_LXVW4X_V16QI:
15499 case VSX_BUILTIN_LXVW4X_V8HI:
15500 case VSX_BUILTIN_LXVW4X_V4SF:
15501 case VSX_BUILTIN_LXVW4X_V4SI:
15502 case VSX_BUILTIN_LXVD2X_V2DF:
15503 case VSX_BUILTIN_LXVD2X_V2DI:
15505 arg0 = gimple_call_arg (stmt, 0); // offset
15506 arg1 = gimple_call_arg (stmt, 1); // address
15507 lhs = gimple_call_lhs (stmt);
15508 location_t loc = gimple_location (stmt);
15509 /* Since arg1 may be cast to a different type, just use ptr_type_node
15510 here instead of trying to enforce TBAA on pointer types. */
15511 tree arg1_type = ptr_type_node;
15512 tree lhs_type = TREE_TYPE (lhs);
15513 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15514 required alignment (power) is 4 bytes regardless of data type. */
15515 tree align_ltype = build_aligned_type (lhs_type, 4);
15516 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15517 the tree using the value from arg0. The resulting type will match
15518 the type of arg1. */
15519 gimple_seq stmts = NULL;
15520 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15521 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15522 arg1_type, arg1, temp_offset);
15523 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15524 if (!is_gimple_mem_ref_addr (temp_addr))
15526 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15527 gimple *g = gimple_build_assign (t, temp_addr);
15528 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15531 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15532 take an offset, but since we've already incorporated the offset
15533 above, here we just pass in a zero. */
15535 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
15536 build_int_cst (arg1_type, 0)));
15537 gimple_set_location (g, loc);
15538 gsi_replace (gsi, g, true);
15542 /* unaligned Vector stores. */
15543 case VSX_BUILTIN_STXVW4X_V16QI:
15544 case VSX_BUILTIN_STXVW4X_V8HI:
15545 case VSX_BUILTIN_STXVW4X_V4SF:
15546 case VSX_BUILTIN_STXVW4X_V4SI:
15547 case VSX_BUILTIN_STXVD2X_V2DF:
15548 case VSX_BUILTIN_STXVD2X_V2DI:
15550 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15551 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15552 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15553 location_t loc = gimple_location (stmt);
15554 tree arg0_type = TREE_TYPE (arg0);
15555 /* Use ptr_type_node (no TBAA) for the arg2_type. */
15556 tree arg2_type = ptr_type_node;
15557 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15558 required alignment (power) is 4 bytes regardless of data type. */
15559 tree align_stype = build_aligned_type (arg0_type, 4);
15560 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15561 the tree using the value from arg1. */
15562 gimple_seq stmts = NULL;
15563 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15564 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15565 arg2_type, arg2, temp_offset);
15566 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15567 if (!is_gimple_mem_ref_addr (temp_addr))
15569 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15570 gimple *g = gimple_build_assign (t, temp_addr);
15571 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15575 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
15576 build_int_cst (arg2_type, 0)), arg0);
15577 gimple_set_location (g, loc);
15578 gsi_replace (gsi, g, true);
15582 /* Vector Fused multiply-add (fma). */
15583 case ALTIVEC_BUILTIN_VMADDFP:
15584 case VSX_BUILTIN_XVMADDDP:
15585 case ALTIVEC_BUILTIN_VMLADDUHM:
15587 arg0 = gimple_call_arg (stmt, 0);
15588 arg1 = gimple_call_arg (stmt, 1);
15589 tree arg2 = gimple_call_arg (stmt, 2);
15590 lhs = gimple_call_lhs (stmt);
15591 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15592 gimple_call_set_lhs (g, lhs);
15593 gimple_call_set_nothrow (g, true);
15594 gimple_set_location (g, gimple_location (stmt));
15595 gsi_replace (gsi, g, true);
15599 /* Vector compares; EQ, NE, GE, GT, LE. */
15600 case ALTIVEC_BUILTIN_VCMPEQUB:
15601 case ALTIVEC_BUILTIN_VCMPEQUH:
15602 case ALTIVEC_BUILTIN_VCMPEQUW:
15603 case P8V_BUILTIN_VCMPEQUD:
15604 fold_compare_helper (gsi, EQ_EXPR, stmt);
15607 case P9V_BUILTIN_CMPNEB:
15608 case P9V_BUILTIN_CMPNEH:
15609 case P9V_BUILTIN_CMPNEW:
15610 fold_compare_helper (gsi, NE_EXPR, stmt);
15613 case VSX_BUILTIN_CMPGE_16QI:
15614 case VSX_BUILTIN_CMPGE_U16QI:
15615 case VSX_BUILTIN_CMPGE_8HI:
15616 case VSX_BUILTIN_CMPGE_U8HI:
15617 case VSX_BUILTIN_CMPGE_4SI:
15618 case VSX_BUILTIN_CMPGE_U4SI:
15619 case VSX_BUILTIN_CMPGE_2DI:
15620 case VSX_BUILTIN_CMPGE_U2DI:
15621 fold_compare_helper (gsi, GE_EXPR, stmt);
15624 case ALTIVEC_BUILTIN_VCMPGTSB:
15625 case ALTIVEC_BUILTIN_VCMPGTUB:
15626 case ALTIVEC_BUILTIN_VCMPGTSH:
15627 case ALTIVEC_BUILTIN_VCMPGTUH:
15628 case ALTIVEC_BUILTIN_VCMPGTSW:
15629 case ALTIVEC_BUILTIN_VCMPGTUW:
15630 case P8V_BUILTIN_VCMPGTUD:
15631 case P8V_BUILTIN_VCMPGTSD:
15632 fold_compare_helper (gsi, GT_EXPR, stmt);
15635 case VSX_BUILTIN_CMPLE_16QI:
15636 case VSX_BUILTIN_CMPLE_U16QI:
15637 case VSX_BUILTIN_CMPLE_8HI:
15638 case VSX_BUILTIN_CMPLE_U8HI:
15639 case VSX_BUILTIN_CMPLE_4SI:
15640 case VSX_BUILTIN_CMPLE_U4SI:
15641 case VSX_BUILTIN_CMPLE_2DI:
15642 case VSX_BUILTIN_CMPLE_U2DI:
15643 fold_compare_helper (gsi, LE_EXPR, stmt);
15646 /* flavors of vec_splat_[us]{8,16,32}. */
15647 case ALTIVEC_BUILTIN_VSPLTISB:
15648 case ALTIVEC_BUILTIN_VSPLTISH:
15649 case ALTIVEC_BUILTIN_VSPLTISW:
15651 arg0 = gimple_call_arg (stmt, 0);
15652 lhs = gimple_call_lhs (stmt);
15654 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15655 5-bit signed constant in range -16 to +15. */
15656 if (TREE_CODE (arg0) != INTEGER_CST
15657 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
15659 gimple_seq stmts = NULL;
15660 location_t loc = gimple_location (stmt);
15661 tree splat_value = gimple_convert (&stmts, loc,
15662 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15663 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15664 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15665 g = gimple_build_assign (lhs, splat_tree);
15666 gimple_set_location (g, gimple_location (stmt));
15667 gsi_replace (gsi, g, true);
15671 /* Flavors of vec_splat. */
15672 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
15673 case ALTIVEC_BUILTIN_VSPLTB:
15674 case ALTIVEC_BUILTIN_VSPLTH:
15675 case ALTIVEC_BUILTIN_VSPLTW:
15676 case VSX_BUILTIN_XXSPLTD_V2DI:
15677 case VSX_BUILTIN_XXSPLTD_V2DF:
15679 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
15680 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
15681 /* Only fold the vec_splat_*() if arg1 is both a constant value and
15682 is a valid index into the arg0 vector. */
15683 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
15684 if (TREE_CODE (arg1) != INTEGER_CST
15685 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
15687 lhs = gimple_call_lhs (stmt);
15688 tree lhs_type = TREE_TYPE (lhs);
15689 tree arg0_type = TREE_TYPE (arg0);
15691 if (TREE_CODE (arg0) == VECTOR_CST)
15692 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
15695 /* Determine (in bits) the length and start location of the
15696 splat value for a call to the tree_vec_extract helper. */
15697 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
15698 * BITS_PER_UNIT / n_elts;
15699 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
15700 tree len = build_int_cst (bitsizetype, splat_elem_size);
15701 tree start = build_int_cst (bitsizetype, splat_start_bit);
15702 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
15705 /* And finally, build the new vector. */
15706 tree splat_tree = build_vector_from_val (lhs_type, splat);
15707 g = gimple_build_assign (lhs, splat_tree);
15708 gimple_set_location (g, gimple_location (stmt));
15709 gsi_replace (gsi, g, true);
15713 /* vec_mergel (integrals). */
15714 case ALTIVEC_BUILTIN_VMRGLH:
15715 case ALTIVEC_BUILTIN_VMRGLW:
15716 case VSX_BUILTIN_XXMRGLW_4SI:
15717 case ALTIVEC_BUILTIN_VMRGLB:
15718 case VSX_BUILTIN_VEC_MERGEL_V2DI:
15719 case VSX_BUILTIN_XXMRGLW_4SF:
15720 case VSX_BUILTIN_VEC_MERGEL_V2DF:
15721 fold_mergehl_helper (gsi, stmt, 1);
15723 /* vec_mergeh (integrals). */
15724 case ALTIVEC_BUILTIN_VMRGHH:
15725 case ALTIVEC_BUILTIN_VMRGHW:
15726 case VSX_BUILTIN_XXMRGHW_4SI:
15727 case ALTIVEC_BUILTIN_VMRGHB:
15728 case VSX_BUILTIN_VEC_MERGEH_V2DI:
15729 case VSX_BUILTIN_XXMRGHW_4SF:
15730 case VSX_BUILTIN_VEC_MERGEH_V2DF:
15731 fold_mergehl_helper (gsi, stmt, 0);
15734 /* Flavors of vec_mergee. */
15735 case P8V_BUILTIN_VMRGEW_V4SI:
15736 case P8V_BUILTIN_VMRGEW_V2DI:
15737 case P8V_BUILTIN_VMRGEW_V4SF:
15738 case P8V_BUILTIN_VMRGEW_V2DF:
15739 fold_mergeeo_helper (gsi, stmt, 0);
15741 /* Flavors of vec_mergeo. */
15742 case P8V_BUILTIN_VMRGOW_V4SI:
15743 case P8V_BUILTIN_VMRGOW_V2DI:
15744 case P8V_BUILTIN_VMRGOW_V4SF:
15745 case P8V_BUILTIN_VMRGOW_V2DF:
15746 fold_mergeeo_helper (gsi, stmt, 1);
15749 /* d = vec_pack (a, b) */
15750 case P8V_BUILTIN_VPKUDUM:
15751 case ALTIVEC_BUILTIN_VPKUHUM:
15752 case ALTIVEC_BUILTIN_VPKUWUM:
15754 arg0 = gimple_call_arg (stmt, 0);
15755 arg1 = gimple_call_arg (stmt, 1);
15756 lhs = gimple_call_lhs (stmt);
15757 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
15758 gimple_set_location (g, gimple_location (stmt));
15759 gsi_replace (gsi, g, true);
15763 /* d = vec_unpackh (a) */
15764 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
15765 in this code is sensitive to endian-ness, and needs to be inverted to
15766 handle both LE and BE targets. */
15767 case ALTIVEC_BUILTIN_VUPKHSB:
15768 case ALTIVEC_BUILTIN_VUPKHSH:
15769 case P8V_BUILTIN_VUPKHSW:
15771 arg0 = gimple_call_arg (stmt, 0);
15772 lhs = gimple_call_lhs (stmt);
15773 if (BYTES_BIG_ENDIAN)
15774 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15776 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15777 gimple_set_location (g, gimple_location (stmt));
15778 gsi_replace (gsi, g, true);
15781 /* d = vec_unpackl (a) */
15782 case ALTIVEC_BUILTIN_VUPKLSB:
15783 case ALTIVEC_BUILTIN_VUPKLSH:
15784 case P8V_BUILTIN_VUPKLSW:
15786 arg0 = gimple_call_arg (stmt, 0);
15787 lhs = gimple_call_lhs (stmt);
15788 if (BYTES_BIG_ENDIAN)
15789 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15791 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15792 gimple_set_location (g, gimple_location (stmt));
15793 gsi_replace (gsi, g, true);
15796 /* There is no gimple type corresponding with pixel, so just return. */
15797 case ALTIVEC_BUILTIN_VUPKHPX:
15798 case ALTIVEC_BUILTIN_VUPKLPX:
15802 case ALTIVEC_BUILTIN_VPERM_16QI:
15803 case ALTIVEC_BUILTIN_VPERM_8HI:
15804 case ALTIVEC_BUILTIN_VPERM_4SI:
15805 case ALTIVEC_BUILTIN_VPERM_2DI:
15806 case ALTIVEC_BUILTIN_VPERM_4SF:
15807 case ALTIVEC_BUILTIN_VPERM_2DF:
15809 arg0 = gimple_call_arg (stmt, 0);
15810 arg1 = gimple_call_arg (stmt, 1);
15811 tree permute = gimple_call_arg (stmt, 2);
15812 lhs = gimple_call_lhs (stmt);
15813 location_t loc = gimple_location (stmt);
15814 gimple_seq stmts = NULL;
15815 // convert arg0 and arg1 to match the type of the permute
15816 // for the VEC_PERM_EXPR operation.
15817 tree permute_type = (TREE_TYPE (permute));
15818 tree arg0_ptype = gimple_convert (&stmts, loc, permute_type, arg0);
15819 tree arg1_ptype = gimple_convert (&stmts, loc, permute_type, arg1);
15820 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
15821 permute_type, arg0_ptype, arg1_ptype,
15823 // Convert the result back to the desired lhs type upon completion.
15824 tree temp = gimple_convert (&stmts, loc, TREE_TYPE (lhs), lhs_ptype);
15825 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15826 g = gimple_build_assign (lhs, temp);
15827 gimple_set_location (g, loc);
15828 gsi_replace (gsi, g, true);
15833 if (TARGET_DEBUG_BUILTIN)
15834 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
15835 fn_code, fn_name1, fn_name2);
15842 /* Expand an expression EXP that calls a built-in function,
15843 with result going to TARGET if that's convenient
15844 (and in mode MODE if that's convenient).
15845 SUBTARGET may be used as the target for computing one of EXP's operands.
15846 IGNORE is nonzero if the value is to be ignored. */
15849 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15850 machine_mode mode ATTRIBUTE_UNUSED,
15851 int ignore ATTRIBUTE_UNUSED)
15853 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15854 enum rs6000_builtins fcode
15855 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15856 size_t uns_fcode = (size_t)fcode;
15857 const struct builtin_description *d;
15861 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15862 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15863 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15865 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
15866 floating point type, depending on whether long double is the IBM extended
15867 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
15868 we only define one variant of the built-in function, and switch the code
15869 when defining it, rather than defining two built-ins and using the
15870 overload table in rs6000-c.c to switch between the two. If we don't have
15871 the proper assembler, don't do this switch because CODE_FOR_*kf* and
15872 CODE_FOR_*tf* will be CODE_FOR_nothing. */
15873 if (FLOAT128_IEEE_P (TFmode))
15879 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
15880 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
15881 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
15882 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
15883 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
15884 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
15885 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
15886 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
15887 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
15888 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
15889 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
15890 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
15891 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
15894 if (TARGET_DEBUG_BUILTIN)
15896 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15897 const char *name2 = (icode != CODE_FOR_nothing)
15898 ? get_insn_name ((int) icode)
15902 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15904 default: name3 = "unknown"; break;
15905 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15906 case RS6000_BTC_UNARY: name3 = "unary"; break;
15907 case RS6000_BTC_BINARY: name3 = "binary"; break;
15908 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15909 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15910 case RS6000_BTC_ABS: name3 = "abs"; break;
15911 case RS6000_BTC_DST: name3 = "dst"; break;
15916 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15917 (name1) ? name1 : "---", fcode,
15918 (name2) ? name2 : "---", (int) icode,
15920 func_valid_p ? "" : ", not valid");
15925 rs6000_invalid_builtin (fcode);
15927 /* Given it is invalid, just generate a normal call. */
15928 return expand_call (exp, target, ignore);
15933 case RS6000_BUILTIN_RECIP:
15934 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15936 case RS6000_BUILTIN_RECIPF:
15937 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15939 case RS6000_BUILTIN_RSQRTF:
15940 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15942 case RS6000_BUILTIN_RSQRT:
15943 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15945 case POWER7_BUILTIN_BPERMD:
15946 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15947 ? CODE_FOR_bpermd_di
15948 : CODE_FOR_bpermd_si), exp, target);
15950 case RS6000_BUILTIN_GET_TB:
15951 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15954 case RS6000_BUILTIN_MFTB:
15955 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15956 ? CODE_FOR_rs6000_mftb_di
15957 : CODE_FOR_rs6000_mftb_si),
15960 case RS6000_BUILTIN_MFFS:
15961 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15963 case RS6000_BUILTIN_MTFSB0:
15964 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
15966 case RS6000_BUILTIN_MTFSB1:
15967 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
15969 case RS6000_BUILTIN_SET_FPSCR_RN:
15970 return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
15973 case RS6000_BUILTIN_SET_FPSCR_DRN:
15975 rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
15978 case RS6000_BUILTIN_MFFSL:
15979 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
15981 case RS6000_BUILTIN_MTFSF:
15982 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15984 case RS6000_BUILTIN_CPU_INIT:
15985 case RS6000_BUILTIN_CPU_IS:
15986 case RS6000_BUILTIN_CPU_SUPPORTS:
15987 return cpu_expand_builtin (fcode, exp, target);
15989 case MISC_BUILTIN_SPEC_BARRIER:
15991 emit_insn (gen_speculation_barrier ());
15995 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15996 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15998 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15999 : (int) CODE_FOR_altivec_lvsl_direct);
16000 machine_mode tmode = insn_data[icode2].operand[0].mode;
16001 machine_mode mode = insn_data[icode2].operand[1].mode;
16005 gcc_assert (TARGET_ALTIVEC);
16007 arg = CALL_EXPR_ARG (exp, 0);
16008 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16009 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16010 addr = memory_address (mode, op);
16011 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16015 /* For the load case need to negate the address. */
16016 op = gen_reg_rtx (GET_MODE (addr));
16017 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16019 op = gen_rtx_MEM (mode, op);
16022 || GET_MODE (target) != tmode
16023 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16024 target = gen_reg_rtx (tmode);
16026 pat = GEN_FCN (icode2) (target, op);
16034 case ALTIVEC_BUILTIN_VCFUX:
16035 case ALTIVEC_BUILTIN_VCFSX:
16036 case ALTIVEC_BUILTIN_VCTUXS:
16037 case ALTIVEC_BUILTIN_VCTSXS:
16038 /* FIXME: There's got to be a nicer way to handle this case than
16039 constructing a new CALL_EXPR. */
16040 if (call_expr_nargs (exp) == 1)
16042 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16043 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16047 /* For the pack and unpack int128 routines, fix up the builtin so it
16048 uses the correct IBM128 type. */
16049 case MISC_BUILTIN_PACK_IF:
16050 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16052 icode = CODE_FOR_packtf;
16053 fcode = MISC_BUILTIN_PACK_TF;
16054 uns_fcode = (size_t)fcode;
16058 case MISC_BUILTIN_UNPACK_IF:
16059 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16061 icode = CODE_FOR_unpacktf;
16062 fcode = MISC_BUILTIN_UNPACK_TF;
16063 uns_fcode = (size_t)fcode;
16071 if (TARGET_ALTIVEC)
16073 ret = altivec_expand_builtin (exp, target, &success);
16080 ret = htm_expand_builtin (exp, target, &success);
16086 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16087 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16088 gcc_assert (attr == RS6000_BTC_UNARY
16089 || attr == RS6000_BTC_BINARY
16090 || attr == RS6000_BTC_TERNARY
16091 || attr == RS6000_BTC_SPECIAL);
16093 /* Handle simple unary operations. */
16095 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16096 if (d->code == fcode)
16097 return rs6000_expand_unop_builtin (icode, exp, target);
16099 /* Handle simple binary operations. */
16101 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16102 if (d->code == fcode)
16103 return rs6000_expand_binop_builtin (icode, exp, target);
16105 /* Handle simple ternary operations. */
16107 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16108 if (d->code == fcode)
16109 return rs6000_expand_ternop_builtin (icode, exp, target);
16111 /* Handle simple no-argument operations. */
16113 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16114 if (d->code == fcode)
16115 return rs6000_expand_zeroop_builtin (icode, target);
16117 gcc_unreachable ();
16120 /* Create a builtin vector type with a name. Taking care not to give
16121 the canonical type a name. */
16124 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16126 tree result = build_vector_type (elt_type, num_elts);
16128 /* Copy so we don't give the canonical type a name. */
16129 result = build_variant_type_copy (result);
16131 add_builtin_type (name, result);
16137 rs6000_init_builtins (void)
16143 if (TARGET_DEBUG_BUILTIN)
16144 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16145 (TARGET_ALTIVEC) ? ", altivec" : "",
16146 (TARGET_VSX) ? ", vsx" : "");
16148 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16149 : "__vector long long",
16150 intDI_type_node, 2);
16151 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16152 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16153 intSI_type_node, 4);
16154 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16155 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16156 intHI_type_node, 8);
16157 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16158 intQI_type_node, 16);
16160 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16161 unsigned_intQI_type_node, 16);
16162 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16163 unsigned_intHI_type_node, 8);
16164 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16165 unsigned_intSI_type_node, 4);
16166 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16167 ? "__vector unsigned long"
16168 : "__vector unsigned long long",
16169 unsigned_intDI_type_node, 2);
16171 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16173 const_str_type_node
16174 = build_pointer_type (build_qualified_type (char_type_node,
16177 /* We use V1TI mode as a special container to hold __int128_t items that
16178 must live in VSX registers. */
16179 if (intTI_type_node)
16181 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16182 intTI_type_node, 1);
16183 unsigned_V1TI_type_node
16184 = rs6000_vector_type ("__vector unsigned __int128",
16185 unsigned_intTI_type_node, 1);
16188 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16189 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16190 'vector unsigned short'. */
16192 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16193 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16194 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16195 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16196 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16198 long_integer_type_internal_node = long_integer_type_node;
16199 long_unsigned_type_internal_node = long_unsigned_type_node;
16200 long_long_integer_type_internal_node = long_long_integer_type_node;
16201 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16202 intQI_type_internal_node = intQI_type_node;
16203 uintQI_type_internal_node = unsigned_intQI_type_node;
16204 intHI_type_internal_node = intHI_type_node;
16205 uintHI_type_internal_node = unsigned_intHI_type_node;
16206 intSI_type_internal_node = intSI_type_node;
16207 uintSI_type_internal_node = unsigned_intSI_type_node;
16208 intDI_type_internal_node = intDI_type_node;
16209 uintDI_type_internal_node = unsigned_intDI_type_node;
16210 intTI_type_internal_node = intTI_type_node;
16211 uintTI_type_internal_node = unsigned_intTI_type_node;
16212 float_type_internal_node = float_type_node;
16213 double_type_internal_node = double_type_node;
16214 long_double_type_internal_node = long_double_type_node;
16215 dfloat64_type_internal_node = dfloat64_type_node;
16216 dfloat128_type_internal_node = dfloat128_type_node;
16217 void_type_internal_node = void_type_node;
16219 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16220 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16221 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16222 format that uses a pair of doubles, depending on the switches and
16225 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16226 floating point, we need make sure the type is non-zero or else self-test
16227 fails during bootstrap.
16229 Always create __ibm128 as a separate type, even if the current long double
16230 format is IBM extended double.
16232 For IEEE 128-bit floating point, always create the type __ieee128. If the
16233 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16235 if (TARGET_FLOAT128_TYPE)
16237 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16238 ibm128_float_type_node = long_double_type_node;
16241 ibm128_float_type_node = make_node (REAL_TYPE);
16242 TYPE_PRECISION (ibm128_float_type_node) = 128;
16243 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16244 layout_type (ibm128_float_type_node);
16247 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16250 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16251 ieee128_float_type_node = long_double_type_node;
16253 ieee128_float_type_node = float128_type_node;
16255 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16260 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16262 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16264 builtin_mode_to_type[QImode][0] = integer_type_node;
16265 builtin_mode_to_type[HImode][0] = integer_type_node;
16266 builtin_mode_to_type[SImode][0] = intSI_type_node;
16267 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16268 builtin_mode_to_type[DImode][0] = intDI_type_node;
16269 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16270 builtin_mode_to_type[TImode][0] = intTI_type_node;
16271 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16272 builtin_mode_to_type[SFmode][0] = float_type_node;
16273 builtin_mode_to_type[DFmode][0] = double_type_node;
16274 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16275 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16276 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16277 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16278 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16279 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16280 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16281 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16282 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16283 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16284 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16285 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16286 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16287 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16288 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16289 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16290 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16292 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16293 TYPE_NAME (bool_char_type_node) = tdecl;
16295 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16296 TYPE_NAME (bool_short_type_node) = tdecl;
16298 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16299 TYPE_NAME (bool_int_type_node) = tdecl;
16301 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16302 TYPE_NAME (pixel_type_node) = tdecl;
16304 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16305 bool_char_type_node, 16);
16306 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16307 bool_short_type_node, 8);
16308 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16309 bool_int_type_node, 4);
16310 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16311 ? "__vector __bool long"
16312 : "__vector __bool long long",
16313 bool_long_long_type_node, 2);
16314 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16315 pixel_type_node, 8);
16317 /* Create Altivec and VSX builtins on machines with at least the
16318 general purpose extensions (970 and newer) to allow the use of
16319 the target attribute. */
16320 if (TARGET_EXTRA_BUILTINS)
16321 altivec_init_builtins ();
16323 htm_init_builtins ();
16325 if (TARGET_EXTRA_BUILTINS)
16326 rs6000_common_init_builtins ();
16328 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16329 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16330 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16332 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16333 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16334 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16336 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16337 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16338 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16340 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16341 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16342 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16344 mode = (TARGET_64BIT) ? DImode : SImode;
16345 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16346 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16347 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16349 ftype = build_function_type_list (unsigned_intDI_type_node,
16351 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16354 ftype = build_function_type_list (unsigned_intDI_type_node,
16357 ftype = build_function_type_list (unsigned_intSI_type_node,
16359 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16361 ftype = build_function_type_list (double_type_node, NULL_TREE);
16362 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16364 ftype = build_function_type_list (double_type_node, NULL_TREE);
16365 def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
16367 ftype = build_function_type_list (void_type_node,
16370 def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
16372 ftype = build_function_type_list (void_type_node,
16375 def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
16377 ftype = build_function_type_list (void_type_node,
16380 def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
16382 ftype = build_function_type_list (void_type_node,
16385 def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
16387 ftype = build_function_type_list (void_type_node,
16388 intSI_type_node, double_type_node,
16390 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16392 ftype = build_function_type_list (void_type_node, NULL_TREE);
16393 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16394 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16395 MISC_BUILTIN_SPEC_BARRIER);
16397 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16399 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16400 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16402 /* AIX libm provides clog as __clog. */
16403 if (TARGET_XCOFF &&
16404 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16405 set_user_assembler_name (tdecl, "__clog");
16407 #ifdef SUBTARGET_INIT_BUILTINS
16408 SUBTARGET_INIT_BUILTINS;
16412 /* Returns the rs6000 builtin decl for CODE. */
16415 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16417 HOST_WIDE_INT fnmask;
16419 if (code >= RS6000_BUILTIN_COUNT)
16420 return error_mark_node;
16422 fnmask = rs6000_builtin_info[code].mask;
16423 if ((fnmask & rs6000_builtin_mask) != fnmask)
16425 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16426 return error_mark_node;
16429 return rs6000_builtin_decls[code];
16433 altivec_init_builtins (void)
16435 const struct builtin_description *d;
16439 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16441 tree pvoid_type_node = build_pointer_type (void_type_node);
16443 tree pcvoid_type_node
16444 = build_pointer_type (build_qualified_type (void_type_node,
16447 tree int_ftype_opaque
16448 = build_function_type_list (integer_type_node,
16449 opaque_V4SI_type_node, NULL_TREE);
16450 tree opaque_ftype_opaque
16451 = build_function_type_list (integer_type_node, NULL_TREE);
16452 tree opaque_ftype_opaque_int
16453 = build_function_type_list (opaque_V4SI_type_node,
16454 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16455 tree opaque_ftype_opaque_opaque_int
16456 = build_function_type_list (opaque_V4SI_type_node,
16457 opaque_V4SI_type_node, opaque_V4SI_type_node,
16458 integer_type_node, NULL_TREE);
16459 tree opaque_ftype_opaque_opaque_opaque
16460 = build_function_type_list (opaque_V4SI_type_node,
16461 opaque_V4SI_type_node, opaque_V4SI_type_node,
16462 opaque_V4SI_type_node, NULL_TREE);
16463 tree opaque_ftype_opaque_opaque
16464 = build_function_type_list (opaque_V4SI_type_node,
16465 opaque_V4SI_type_node, opaque_V4SI_type_node,
16467 tree int_ftype_int_opaque_opaque
16468 = build_function_type_list (integer_type_node,
16469 integer_type_node, opaque_V4SI_type_node,
16470 opaque_V4SI_type_node, NULL_TREE);
16471 tree int_ftype_int_v4si_v4si
16472 = build_function_type_list (integer_type_node,
16473 integer_type_node, V4SI_type_node,
16474 V4SI_type_node, NULL_TREE);
16475 tree int_ftype_int_v2di_v2di
16476 = build_function_type_list (integer_type_node,
16477 integer_type_node, V2DI_type_node,
16478 V2DI_type_node, NULL_TREE);
16479 tree void_ftype_v4si
16480 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16481 tree v8hi_ftype_void
16482 = build_function_type_list (V8HI_type_node, NULL_TREE);
16483 tree void_ftype_void
16484 = build_function_type_list (void_type_node, NULL_TREE);
16485 tree void_ftype_int
16486 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16488 tree opaque_ftype_long_pcvoid
16489 = build_function_type_list (opaque_V4SI_type_node,
16490 long_integer_type_node, pcvoid_type_node,
16492 tree v16qi_ftype_long_pcvoid
16493 = build_function_type_list (V16QI_type_node,
16494 long_integer_type_node, pcvoid_type_node,
16496 tree v8hi_ftype_long_pcvoid
16497 = build_function_type_list (V8HI_type_node,
16498 long_integer_type_node, pcvoid_type_node,
16500 tree v4si_ftype_long_pcvoid
16501 = build_function_type_list (V4SI_type_node,
16502 long_integer_type_node, pcvoid_type_node,
16504 tree v4sf_ftype_long_pcvoid
16505 = build_function_type_list (V4SF_type_node,
16506 long_integer_type_node, pcvoid_type_node,
16508 tree v2df_ftype_long_pcvoid
16509 = build_function_type_list (V2DF_type_node,
16510 long_integer_type_node, pcvoid_type_node,
16512 tree v2di_ftype_long_pcvoid
16513 = build_function_type_list (V2DI_type_node,
16514 long_integer_type_node, pcvoid_type_node,
16516 tree v1ti_ftype_long_pcvoid
16517 = build_function_type_list (V1TI_type_node,
16518 long_integer_type_node, pcvoid_type_node,
16521 tree void_ftype_opaque_long_pvoid
16522 = build_function_type_list (void_type_node,
16523 opaque_V4SI_type_node, long_integer_type_node,
16524 pvoid_type_node, NULL_TREE);
16525 tree void_ftype_v4si_long_pvoid
16526 = build_function_type_list (void_type_node,
16527 V4SI_type_node, long_integer_type_node,
16528 pvoid_type_node, NULL_TREE);
16529 tree void_ftype_v16qi_long_pvoid
16530 = build_function_type_list (void_type_node,
16531 V16QI_type_node, long_integer_type_node,
16532 pvoid_type_node, NULL_TREE);
16534 tree void_ftype_v16qi_pvoid_long
16535 = build_function_type_list (void_type_node,
16536 V16QI_type_node, pvoid_type_node,
16537 long_integer_type_node, NULL_TREE);
16539 tree void_ftype_v8hi_long_pvoid
16540 = build_function_type_list (void_type_node,
16541 V8HI_type_node, long_integer_type_node,
16542 pvoid_type_node, NULL_TREE);
16543 tree void_ftype_v4sf_long_pvoid
16544 = build_function_type_list (void_type_node,
16545 V4SF_type_node, long_integer_type_node,
16546 pvoid_type_node, NULL_TREE);
16547 tree void_ftype_v2df_long_pvoid
16548 = build_function_type_list (void_type_node,
16549 V2DF_type_node, long_integer_type_node,
16550 pvoid_type_node, NULL_TREE);
16551 tree void_ftype_v1ti_long_pvoid
16552 = build_function_type_list (void_type_node,
16553 V1TI_type_node, long_integer_type_node,
16554 pvoid_type_node, NULL_TREE);
16555 tree void_ftype_v2di_long_pvoid
16556 = build_function_type_list (void_type_node,
16557 V2DI_type_node, long_integer_type_node,
16558 pvoid_type_node, NULL_TREE);
16559 tree int_ftype_int_v8hi_v8hi
16560 = build_function_type_list (integer_type_node,
16561 integer_type_node, V8HI_type_node,
16562 V8HI_type_node, NULL_TREE);
16563 tree int_ftype_int_v16qi_v16qi
16564 = build_function_type_list (integer_type_node,
16565 integer_type_node, V16QI_type_node,
16566 V16QI_type_node, NULL_TREE);
16567 tree int_ftype_int_v4sf_v4sf
16568 = build_function_type_list (integer_type_node,
16569 integer_type_node, V4SF_type_node,
16570 V4SF_type_node, NULL_TREE);
16571 tree int_ftype_int_v2df_v2df
16572 = build_function_type_list (integer_type_node,
16573 integer_type_node, V2DF_type_node,
16574 V2DF_type_node, NULL_TREE);
16575 tree v2di_ftype_v2di
16576 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16577 tree v4si_ftype_v4si
16578 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16579 tree v8hi_ftype_v8hi
16580 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16581 tree v16qi_ftype_v16qi
16582 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16583 tree v4sf_ftype_v4sf
16584 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16585 tree v2df_ftype_v2df
16586 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16587 tree void_ftype_pcvoid_int_int
16588 = build_function_type_list (void_type_node,
16589 pcvoid_type_node, integer_type_node,
16590 integer_type_node, NULL_TREE);
16592 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16593 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16594 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16595 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16596 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16597 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16598 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16599 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16600 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16601 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16602 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16603 ALTIVEC_BUILTIN_LVXL_V2DF);
16604 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16605 ALTIVEC_BUILTIN_LVXL_V2DI);
16606 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16607 ALTIVEC_BUILTIN_LVXL_V4SF);
16608 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16609 ALTIVEC_BUILTIN_LVXL_V4SI);
16610 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16611 ALTIVEC_BUILTIN_LVXL_V8HI);
16612 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16613 ALTIVEC_BUILTIN_LVXL_V16QI);
16614 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16615 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16616 ALTIVEC_BUILTIN_LVX_V1TI);
16617 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16618 ALTIVEC_BUILTIN_LVX_V2DF);
16619 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16620 ALTIVEC_BUILTIN_LVX_V2DI);
16621 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16622 ALTIVEC_BUILTIN_LVX_V4SF);
16623 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16624 ALTIVEC_BUILTIN_LVX_V4SI);
16625 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16626 ALTIVEC_BUILTIN_LVX_V8HI);
16627 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16628 ALTIVEC_BUILTIN_LVX_V16QI);
16629 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16630 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16631 ALTIVEC_BUILTIN_STVX_V2DF);
16632 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16633 ALTIVEC_BUILTIN_STVX_V2DI);
16634 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16635 ALTIVEC_BUILTIN_STVX_V4SF);
16636 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16637 ALTIVEC_BUILTIN_STVX_V4SI);
16638 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16639 ALTIVEC_BUILTIN_STVX_V8HI);
16640 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16641 ALTIVEC_BUILTIN_STVX_V16QI);
16642 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16643 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16644 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16645 ALTIVEC_BUILTIN_STVXL_V2DF);
16646 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16647 ALTIVEC_BUILTIN_STVXL_V2DI);
16648 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16649 ALTIVEC_BUILTIN_STVXL_V4SF);
16650 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16651 ALTIVEC_BUILTIN_STVXL_V4SI);
16652 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16653 ALTIVEC_BUILTIN_STVXL_V8HI);
16654 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16655 ALTIVEC_BUILTIN_STVXL_V16QI);
16656 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16657 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16658 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16659 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16660 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16661 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16662 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16663 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16664 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16665 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16666 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16667 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16668 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16669 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16670 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16671 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16673 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16674 VSX_BUILTIN_LXVD2X_V2DF);
16675 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16676 VSX_BUILTIN_LXVD2X_V2DI);
16677 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16678 VSX_BUILTIN_LXVW4X_V4SF);
16679 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16680 VSX_BUILTIN_LXVW4X_V4SI);
16681 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16682 VSX_BUILTIN_LXVW4X_V8HI);
16683 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16684 VSX_BUILTIN_LXVW4X_V16QI);
16685 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16686 VSX_BUILTIN_STXVD2X_V2DF);
16687 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16688 VSX_BUILTIN_STXVD2X_V2DI);
16689 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16690 VSX_BUILTIN_STXVW4X_V4SF);
16691 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16692 VSX_BUILTIN_STXVW4X_V4SI);
16693 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16694 VSX_BUILTIN_STXVW4X_V8HI);
16695 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16696 VSX_BUILTIN_STXVW4X_V16QI);
16698 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16699 VSX_BUILTIN_LD_ELEMREV_V2DF);
16700 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16701 VSX_BUILTIN_LD_ELEMREV_V2DI);
16702 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16703 VSX_BUILTIN_LD_ELEMREV_V4SF);
16704 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16705 VSX_BUILTIN_LD_ELEMREV_V4SI);
16706 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16707 VSX_BUILTIN_LD_ELEMREV_V8HI);
16708 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16709 VSX_BUILTIN_LD_ELEMREV_V16QI);
16710 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16711 VSX_BUILTIN_ST_ELEMREV_V2DF);
16712 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16713 VSX_BUILTIN_ST_ELEMREV_V1TI);
16714 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16715 VSX_BUILTIN_ST_ELEMREV_V2DI);
16716 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16717 VSX_BUILTIN_ST_ELEMREV_V4SF);
16718 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16719 VSX_BUILTIN_ST_ELEMREV_V4SI);
16720 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16721 VSX_BUILTIN_ST_ELEMREV_V8HI);
16722 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16723 VSX_BUILTIN_ST_ELEMREV_V16QI);
16725 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16726 VSX_BUILTIN_VEC_LD);
16727 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16728 VSX_BUILTIN_VEC_ST);
16729 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16730 VSX_BUILTIN_VEC_XL);
16731 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16732 VSX_BUILTIN_VEC_XL_BE);
16733 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16734 VSX_BUILTIN_VEC_XST);
16735 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16736 VSX_BUILTIN_VEC_XST_BE);
16738 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16739 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16740 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16742 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16743 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16744 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16745 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16746 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16747 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16748 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16749 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16750 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16751 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16752 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16753 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16755 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16756 ALTIVEC_BUILTIN_VEC_ADDE);
16757 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16758 ALTIVEC_BUILTIN_VEC_ADDEC);
16759 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16760 ALTIVEC_BUILTIN_VEC_CMPNE);
16761 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16762 ALTIVEC_BUILTIN_VEC_MUL);
16763 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16764 ALTIVEC_BUILTIN_VEC_SUBE);
16765 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16766 ALTIVEC_BUILTIN_VEC_SUBEC);
16768 /* Cell builtins. */
16769 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16770 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16771 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16772 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16774 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16775 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16776 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16777 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16779 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16780 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16781 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16782 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16784 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16785 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16786 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16787 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16789 if (TARGET_P9_VECTOR)
16791 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16792 P9V_BUILTIN_STXVL);
16793 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16794 P9V_BUILTIN_XST_LEN_R);
16797 /* Add the DST variants. */
16799 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16801 HOST_WIDE_INT mask = d->mask;
16803 /* It is expected that these dst built-in functions may have
16804 d->icode equal to CODE_FOR_nothing. */
16805 if ((mask & builtin_mask) != mask)
16807 if (TARGET_DEBUG_BUILTIN)
16808 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16812 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16815 /* Initialize the predicates. */
16816 d = bdesc_altivec_preds;
16817 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16819 machine_mode mode1;
16821 HOST_WIDE_INT mask = d->mask;
16823 if ((mask & builtin_mask) != mask)
16825 if (TARGET_DEBUG_BUILTIN)
16826 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16831 if (rs6000_overloaded_builtin_p (d->code))
16835 /* Cannot define builtin if the instruction is disabled. */
16836 gcc_assert (d->icode != CODE_FOR_nothing);
16837 mode1 = insn_data[d->icode].operand[1].mode;
16843 type = int_ftype_int_opaque_opaque;
16846 type = int_ftype_int_v2di_v2di;
16849 type = int_ftype_int_v4si_v4si;
16852 type = int_ftype_int_v8hi_v8hi;
16855 type = int_ftype_int_v16qi_v16qi;
16858 type = int_ftype_int_v4sf_v4sf;
16861 type = int_ftype_int_v2df_v2df;
16864 gcc_unreachable ();
16867 def_builtin (d->name, type, d->code);
16870 /* Initialize the abs* operators. */
16872 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16874 machine_mode mode0;
16876 HOST_WIDE_INT mask = d->mask;
16878 if ((mask & builtin_mask) != mask)
16880 if (TARGET_DEBUG_BUILTIN)
16881 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
16886 /* Cannot define builtin if the instruction is disabled. */
16887 gcc_assert (d->icode != CODE_FOR_nothing);
16888 mode0 = insn_data[d->icode].operand[0].mode;
16893 type = v2di_ftype_v2di;
16896 type = v4si_ftype_v4si;
16899 type = v8hi_ftype_v8hi;
16902 type = v16qi_ftype_v16qi;
16905 type = v4sf_ftype_v4sf;
16908 type = v2df_ftype_v2df;
16911 gcc_unreachable ();
16914 def_builtin (d->name, type, d->code);
16917 /* Initialize target builtin that implements
16918 targetm.vectorize.builtin_mask_for_load. */
16920 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16921 v16qi_ftype_long_pcvoid,
16922 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16923 BUILT_IN_MD, NULL, NULL_TREE);
16924 TREE_READONLY (decl) = 1;
16925 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16926 altivec_builtin_mask_for_load = decl;
16928 /* Access to the vec_init patterns. */
16929 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16930 integer_type_node, integer_type_node,
16931 integer_type_node, NULL_TREE);
16932 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16934 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16935 short_integer_type_node,
16936 short_integer_type_node,
16937 short_integer_type_node,
16938 short_integer_type_node,
16939 short_integer_type_node,
16940 short_integer_type_node,
16941 short_integer_type_node, NULL_TREE);
16942 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16944 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16945 char_type_node, char_type_node,
16946 char_type_node, char_type_node,
16947 char_type_node, char_type_node,
16948 char_type_node, char_type_node,
16949 char_type_node, char_type_node,
16950 char_type_node, char_type_node,
16951 char_type_node, char_type_node,
16952 char_type_node, NULL_TREE);
16953 def_builtin ("__builtin_vec_init_v16qi", ftype,
16954 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16956 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16957 float_type_node, float_type_node,
16958 float_type_node, NULL_TREE);
16959 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16961 /* VSX builtins. */
16962 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16963 double_type_node, NULL_TREE);
16964 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16966 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16967 intDI_type_node, NULL_TREE);
16968 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16970 /* Access to the vec_set patterns. */
16971 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16973 integer_type_node, NULL_TREE);
16974 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16976 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16978 integer_type_node, NULL_TREE);
16979 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16981 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16983 integer_type_node, NULL_TREE);
16984 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16986 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16988 integer_type_node, NULL_TREE);
16989 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16991 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16993 integer_type_node, NULL_TREE);
16994 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16996 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16998 integer_type_node, NULL_TREE);
16999 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17001 /* Access to the vec_extract patterns. */
17002 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17003 integer_type_node, NULL_TREE);
17004 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17006 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17007 integer_type_node, NULL_TREE);
17008 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17010 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17011 integer_type_node, NULL_TREE);
17012 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17014 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17015 integer_type_node, NULL_TREE);
17016 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17018 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17019 integer_type_node, NULL_TREE);
17020 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17022 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17023 integer_type_node, NULL_TREE);
17024 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17027 if (V1TI_type_node)
17029 tree v1ti_ftype_long_pcvoid
17030 = build_function_type_list (V1TI_type_node,
17031 long_integer_type_node, pcvoid_type_node,
17033 tree void_ftype_v1ti_long_pvoid
17034 = build_function_type_list (void_type_node,
17035 V1TI_type_node, long_integer_type_node,
17036 pvoid_type_node, NULL_TREE);
17037 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17038 VSX_BUILTIN_LD_ELEMREV_V1TI);
17039 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17040 VSX_BUILTIN_LXVD2X_V1TI);
17041 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17042 VSX_BUILTIN_STXVD2X_V1TI);
17043 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17044 NULL_TREE, NULL_TREE);
17045 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17046 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17048 integer_type_node, NULL_TREE);
17049 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17050 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17051 integer_type_node, NULL_TREE);
17052 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17058 htm_init_builtins (void)
17060 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17061 const struct builtin_description *d;
17065 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17067 tree op[MAX_HTM_OPERANDS], type;
17068 HOST_WIDE_INT mask = d->mask;
17069 unsigned attr = rs6000_builtin_info[d->code].attr;
17070 bool void_func = (attr & RS6000_BTC_VOID);
17071 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17073 tree gpr_type_node;
17077 /* It is expected that these htm built-in functions may have
17078 d->icode equal to CODE_FOR_nothing. */
17080 if (TARGET_32BIT && TARGET_POWERPC64)
17081 gpr_type_node = long_long_unsigned_type_node;
17083 gpr_type_node = long_unsigned_type_node;
17085 if (attr & RS6000_BTC_SPR)
17087 rettype = gpr_type_node;
17088 argtype = gpr_type_node;
17090 else if (d->code == HTM_BUILTIN_TABORTDC
17091 || d->code == HTM_BUILTIN_TABORTDCI)
17093 rettype = unsigned_type_node;
17094 argtype = gpr_type_node;
17098 rettype = unsigned_type_node;
17099 argtype = unsigned_type_node;
17102 if ((mask & builtin_mask) != mask)
17104 if (TARGET_DEBUG_BUILTIN)
17105 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17111 if (TARGET_DEBUG_BUILTIN)
17112 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17113 (long unsigned) i);
17117 op[nopnds++] = (void_func) ? void_type_node : rettype;
17119 if (attr_args == RS6000_BTC_UNARY)
17120 op[nopnds++] = argtype;
17121 else if (attr_args == RS6000_BTC_BINARY)
17123 op[nopnds++] = argtype;
17124 op[nopnds++] = argtype;
17126 else if (attr_args == RS6000_BTC_TERNARY)
17128 op[nopnds++] = argtype;
17129 op[nopnds++] = argtype;
17130 op[nopnds++] = argtype;
17136 type = build_function_type_list (op[0], NULL_TREE);
17139 type = build_function_type_list (op[0], op[1], NULL_TREE);
17142 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17145 type = build_function_type_list (op[0], op[1], op[2], op[3],
17149 gcc_unreachable ();
17152 def_builtin (d->name, type, d->code);
17156 /* Hash function for builtin functions with up to 3 arguments and a return
17159 builtin_hasher::hash (builtin_hash_struct *bh)
17164 for (i = 0; i < 4; i++)
17166 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17167 ret = (ret * 2) + bh->uns_p[i];
17173 /* Compare builtin hash entries H1 and H2 for equivalence. */
17175 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17177 return ((p1->mode[0] == p2->mode[0])
17178 && (p1->mode[1] == p2->mode[1])
17179 && (p1->mode[2] == p2->mode[2])
17180 && (p1->mode[3] == p2->mode[3])
17181 && (p1->uns_p[0] == p2->uns_p[0])
17182 && (p1->uns_p[1] == p2->uns_p[1])
17183 && (p1->uns_p[2] == p2->uns_p[2])
17184 && (p1->uns_p[3] == p2->uns_p[3]));
17187 /* Map types for builtin functions with an explicit return type and up to 3
17188 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17189 of the argument. */
17191 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17192 machine_mode mode_arg1, machine_mode mode_arg2,
17193 enum rs6000_builtins builtin, const char *name)
17195 struct builtin_hash_struct h;
17196 struct builtin_hash_struct *h2;
17199 tree ret_type = NULL_TREE;
17200 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17202 /* Create builtin_hash_table. */
17203 if (builtin_hash_table == NULL)
17204 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17206 h.type = NULL_TREE;
17207 h.mode[0] = mode_ret;
17208 h.mode[1] = mode_arg0;
17209 h.mode[2] = mode_arg1;
17210 h.mode[3] = mode_arg2;
17216 /* If the builtin is a type that produces unsigned results or takes unsigned
17217 arguments, and it is returned as a decl for the vectorizer (such as
17218 widening multiplies, permute), make sure the arguments and return value
17219 are type correct. */
17222 /* unsigned 1 argument functions. */
17223 case CRYPTO_BUILTIN_VSBOX:
17224 case CRYPTO_BUILTIN_VSBOX_BE:
17225 case P8V_BUILTIN_VGBBD:
17226 case MISC_BUILTIN_CDTBCD:
17227 case MISC_BUILTIN_CBCDTD:
17232 /* unsigned 2 argument functions. */
17233 case ALTIVEC_BUILTIN_VMULEUB:
17234 case ALTIVEC_BUILTIN_VMULEUH:
17235 case P8V_BUILTIN_VMULEUW:
17236 case ALTIVEC_BUILTIN_VMULOUB:
17237 case ALTIVEC_BUILTIN_VMULOUH:
17238 case P8V_BUILTIN_VMULOUW:
17239 case CRYPTO_BUILTIN_VCIPHER:
17240 case CRYPTO_BUILTIN_VCIPHER_BE:
17241 case CRYPTO_BUILTIN_VCIPHERLAST:
17242 case CRYPTO_BUILTIN_VCIPHERLAST_BE:
17243 case CRYPTO_BUILTIN_VNCIPHER:
17244 case CRYPTO_BUILTIN_VNCIPHER_BE:
17245 case CRYPTO_BUILTIN_VNCIPHERLAST:
17246 case CRYPTO_BUILTIN_VNCIPHERLAST_BE:
17247 case CRYPTO_BUILTIN_VPMSUMB:
17248 case CRYPTO_BUILTIN_VPMSUMH:
17249 case CRYPTO_BUILTIN_VPMSUMW:
17250 case CRYPTO_BUILTIN_VPMSUMD:
17251 case CRYPTO_BUILTIN_VPMSUM:
17252 case MISC_BUILTIN_ADDG6S:
17253 case MISC_BUILTIN_DIVWEU:
17254 case MISC_BUILTIN_DIVDEU:
17255 case VSX_BUILTIN_UDIV_V2DI:
17256 case ALTIVEC_BUILTIN_VMAXUB:
17257 case ALTIVEC_BUILTIN_VMINUB:
17258 case ALTIVEC_BUILTIN_VMAXUH:
17259 case ALTIVEC_BUILTIN_VMINUH:
17260 case ALTIVEC_BUILTIN_VMAXUW:
17261 case ALTIVEC_BUILTIN_VMINUW:
17262 case P8V_BUILTIN_VMAXUD:
17263 case P8V_BUILTIN_VMINUD:
17269 /* unsigned 3 argument functions. */
17270 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17271 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17272 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17273 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17274 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17275 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17276 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17277 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17278 case VSX_BUILTIN_VPERM_16QI_UNS:
17279 case VSX_BUILTIN_VPERM_8HI_UNS:
17280 case VSX_BUILTIN_VPERM_4SI_UNS:
17281 case VSX_BUILTIN_VPERM_2DI_UNS:
17282 case VSX_BUILTIN_XXSEL_16QI_UNS:
17283 case VSX_BUILTIN_XXSEL_8HI_UNS:
17284 case VSX_BUILTIN_XXSEL_4SI_UNS:
17285 case VSX_BUILTIN_XXSEL_2DI_UNS:
17286 case CRYPTO_BUILTIN_VPERMXOR:
17287 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17288 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17289 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17290 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17291 case CRYPTO_BUILTIN_VSHASIGMAW:
17292 case CRYPTO_BUILTIN_VSHASIGMAD:
17293 case CRYPTO_BUILTIN_VSHASIGMA:
17300 /* signed permute functions with unsigned char mask. */
17301 case ALTIVEC_BUILTIN_VPERM_16QI:
17302 case ALTIVEC_BUILTIN_VPERM_8HI:
17303 case ALTIVEC_BUILTIN_VPERM_4SI:
17304 case ALTIVEC_BUILTIN_VPERM_4SF:
17305 case ALTIVEC_BUILTIN_VPERM_2DI:
17306 case ALTIVEC_BUILTIN_VPERM_2DF:
17307 case VSX_BUILTIN_VPERM_16QI:
17308 case VSX_BUILTIN_VPERM_8HI:
17309 case VSX_BUILTIN_VPERM_4SI:
17310 case VSX_BUILTIN_VPERM_4SF:
17311 case VSX_BUILTIN_VPERM_2DI:
17312 case VSX_BUILTIN_VPERM_2DF:
17316 /* unsigned args, signed return. */
17317 case VSX_BUILTIN_XVCVUXDSP:
17318 case VSX_BUILTIN_XVCVUXDDP_UNS:
17319 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17323 /* signed args, unsigned return. */
17324 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17325 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17326 case MISC_BUILTIN_UNPACK_TD:
17327 case MISC_BUILTIN_UNPACK_V1TI:
17331 /* unsigned arguments, bool return (compares). */
17332 case ALTIVEC_BUILTIN_VCMPEQUB:
17333 case ALTIVEC_BUILTIN_VCMPEQUH:
17334 case ALTIVEC_BUILTIN_VCMPEQUW:
17335 case P8V_BUILTIN_VCMPEQUD:
17336 case VSX_BUILTIN_CMPGE_U16QI:
17337 case VSX_BUILTIN_CMPGE_U8HI:
17338 case VSX_BUILTIN_CMPGE_U4SI:
17339 case VSX_BUILTIN_CMPGE_U2DI:
17340 case ALTIVEC_BUILTIN_VCMPGTUB:
17341 case ALTIVEC_BUILTIN_VCMPGTUH:
17342 case ALTIVEC_BUILTIN_VCMPGTUW:
17343 case P8V_BUILTIN_VCMPGTUD:
17348 /* unsigned arguments for 128-bit pack instructions. */
17349 case MISC_BUILTIN_PACK_TD:
17350 case MISC_BUILTIN_PACK_V1TI:
17355 /* unsigned second arguments (vector shift right). */
17356 case ALTIVEC_BUILTIN_VSRB:
17357 case ALTIVEC_BUILTIN_VSRH:
17358 case ALTIVEC_BUILTIN_VSRW:
17359 case P8V_BUILTIN_VSRD:
17367 /* Figure out how many args are present. */
17368 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17371 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17372 if (!ret_type && h.uns_p[0])
17373 ret_type = builtin_mode_to_type[h.mode[0]][0];
17376 fatal_error (input_location,
17377 "internal error: builtin function %qs had an unexpected "
17378 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17380 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17381 arg_type[i] = NULL_TREE;
17383 for (i = 0; i < num_args; i++)
17385 int m = (int) h.mode[i+1];
17386 int uns_p = h.uns_p[i+1];
17388 arg_type[i] = builtin_mode_to_type[m][uns_p];
17389 if (!arg_type[i] && uns_p)
17390 arg_type[i] = builtin_mode_to_type[m][0];
17393 fatal_error (input_location,
17394 "internal error: builtin function %qs, argument %d "
17395 "had unexpected argument type %qs", name, i,
17396 GET_MODE_NAME (m));
17399 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17400 if (*found == NULL)
17402 h2 = ggc_alloc<builtin_hash_struct> ();
17406 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17407 arg_type[2], NULL_TREE);
17410 return (*found)->type;
17414 rs6000_common_init_builtins (void)
17416 const struct builtin_description *d;
17419 tree opaque_ftype_opaque = NULL_TREE;
17420 tree opaque_ftype_opaque_opaque = NULL_TREE;
17421 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17422 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17424 /* Create Altivec and VSX builtins on machines with at least the
17425 general purpose extensions (970 and newer) to allow the use of
17426 the target attribute. */
17428 if (TARGET_EXTRA_BUILTINS)
17429 builtin_mask |= RS6000_BTM_COMMON;
17431 /* Add the ternary operators. */
17433 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17436 HOST_WIDE_INT mask = d->mask;
17438 if ((mask & builtin_mask) != mask)
17440 if (TARGET_DEBUG_BUILTIN)
17441 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17445 if (rs6000_overloaded_builtin_p (d->code))
17447 if (! (type = opaque_ftype_opaque_opaque_opaque))
17448 type = opaque_ftype_opaque_opaque_opaque
17449 = build_function_type_list (opaque_V4SI_type_node,
17450 opaque_V4SI_type_node,
17451 opaque_V4SI_type_node,
17452 opaque_V4SI_type_node,
17457 enum insn_code icode = d->icode;
17460 if (TARGET_DEBUG_BUILTIN)
17461 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17467 if (icode == CODE_FOR_nothing)
17469 if (TARGET_DEBUG_BUILTIN)
17470 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17476 type = builtin_function_type (insn_data[icode].operand[0].mode,
17477 insn_data[icode].operand[1].mode,
17478 insn_data[icode].operand[2].mode,
17479 insn_data[icode].operand[3].mode,
17483 def_builtin (d->name, type, d->code);
17486 /* Add the binary operators. */
17488 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17490 machine_mode mode0, mode1, mode2;
17492 HOST_WIDE_INT mask = d->mask;
17494 if ((mask & builtin_mask) != mask)
17496 if (TARGET_DEBUG_BUILTIN)
17497 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17501 if (rs6000_overloaded_builtin_p (d->code))
17503 if (! (type = opaque_ftype_opaque_opaque))
17504 type = opaque_ftype_opaque_opaque
17505 = build_function_type_list (opaque_V4SI_type_node,
17506 opaque_V4SI_type_node,
17507 opaque_V4SI_type_node,
17512 enum insn_code icode = d->icode;
17515 if (TARGET_DEBUG_BUILTIN)
17516 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17522 if (icode == CODE_FOR_nothing)
17524 if (TARGET_DEBUG_BUILTIN)
17525 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17531 mode0 = insn_data[icode].operand[0].mode;
17532 mode1 = insn_data[icode].operand[1].mode;
17533 mode2 = insn_data[icode].operand[2].mode;
17535 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17539 def_builtin (d->name, type, d->code);
17542 /* Add the simple unary operators. */
17544 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17546 machine_mode mode0, mode1;
17548 HOST_WIDE_INT mask = d->mask;
17550 if ((mask & builtin_mask) != mask)
17552 if (TARGET_DEBUG_BUILTIN)
17553 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17557 if (rs6000_overloaded_builtin_p (d->code))
17559 if (! (type = opaque_ftype_opaque))
17560 type = opaque_ftype_opaque
17561 = build_function_type_list (opaque_V4SI_type_node,
17562 opaque_V4SI_type_node,
17567 enum insn_code icode = d->icode;
17570 if (TARGET_DEBUG_BUILTIN)
17571 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17577 if (icode == CODE_FOR_nothing)
17579 if (TARGET_DEBUG_BUILTIN)
17580 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17586 mode0 = insn_data[icode].operand[0].mode;
17587 mode1 = insn_data[icode].operand[1].mode;
17589 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17593 def_builtin (d->name, type, d->code);
17596 /* Add the simple no-argument operators. */
17598 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17600 machine_mode mode0;
17602 HOST_WIDE_INT mask = d->mask;
17604 if ((mask & builtin_mask) != mask)
17606 if (TARGET_DEBUG_BUILTIN)
17607 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17610 if (rs6000_overloaded_builtin_p (d->code))
17612 if (!opaque_ftype_opaque)
17613 opaque_ftype_opaque
17614 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17615 type = opaque_ftype_opaque;
17619 enum insn_code icode = d->icode;
17622 if (TARGET_DEBUG_BUILTIN)
17623 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17624 (long unsigned) i);
17627 if (icode == CODE_FOR_nothing)
17629 if (TARGET_DEBUG_BUILTIN)
17631 "rs6000_builtin, skip no-argument %s (no code)\n",
17635 mode0 = insn_data[icode].operand[0].mode;
17636 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17639 def_builtin (d->name, type, d->code);
17643 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17645 init_float128_ibm (machine_mode mode)
17647 if (!TARGET_XL_COMPAT)
17649 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17650 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17651 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17652 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17654 if (!TARGET_HARD_FLOAT)
17656 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17657 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17658 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17659 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17660 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17661 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17662 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17663 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17665 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17666 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17667 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17668 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17669 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17670 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17671 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17672 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17677 set_optab_libfunc (add_optab, mode, "_xlqadd");
17678 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17679 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17680 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17683 /* Add various conversions for IFmode to use the traditional TFmode
17685 if (mode == IFmode)
17687 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
17688 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
17689 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
17690 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
17691 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
17692 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
17694 if (TARGET_POWERPC64)
17696 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17697 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17698 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17699 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17704 /* Create a decl for either complex long double multiply or complex long double
17705 divide when long double is IEEE 128-bit floating point. We can't use
17706 __multc3 and __divtc3 because the original long double using IBM extended
17707 double used those names. The complex multiply/divide functions are encoded
17708 as builtin functions with a complex result and 4 scalar inputs. */
17711 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17713 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17716 set_builtin_decl (fncode, fndecl, true);
17718 if (TARGET_DEBUG_BUILTIN)
17719 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17724 /* Set up IEEE 128-bit floating point routines. Use different names if the
17725 arguments can be passed in a vector register. The historical PowerPC
17726 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17727 continue to use that if we aren't using vector registers to pass IEEE
17728 128-bit floating point. */
17731 init_float128_ieee (machine_mode mode)
17733 if (FLOAT128_VECTOR_P (mode))
17735 static bool complex_muldiv_init_p = false;
17737 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17738 we have clone or target attributes, this will be called a second
17739 time. We want to create the built-in function only once. */
17740 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17742 complex_muldiv_init_p = true;
17743 built_in_function fncode_mul =
17744 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17745 - MIN_MODE_COMPLEX_FLOAT);
17746 built_in_function fncode_div =
17747 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17748 - MIN_MODE_COMPLEX_FLOAT);
17750 tree fntype = build_function_type_list (complex_long_double_type_node,
17751 long_double_type_node,
17752 long_double_type_node,
17753 long_double_type_node,
17754 long_double_type_node,
17757 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17758 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17761 set_optab_libfunc (add_optab, mode, "__addkf3");
17762 set_optab_libfunc (sub_optab, mode, "__subkf3");
17763 set_optab_libfunc (neg_optab, mode, "__negkf2");
17764 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17765 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17766 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17767 set_optab_libfunc (abs_optab, mode, "__abskf2");
17768 set_optab_libfunc (powi_optab, mode, "__powikf2");
17770 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17771 set_optab_libfunc (ne_optab, mode, "__nekf2");
17772 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17773 set_optab_libfunc (ge_optab, mode, "__gekf2");
17774 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17775 set_optab_libfunc (le_optab, mode, "__lekf2");
17776 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17778 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17779 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17780 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17781 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17783 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17784 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17785 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17787 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17788 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17789 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17791 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
17792 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
17793 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
17794 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
17795 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
17796 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
17798 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17799 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17800 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17801 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17803 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17804 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17805 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17806 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17808 if (TARGET_POWERPC64)
17810 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17811 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17812 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17813 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17819 set_optab_libfunc (add_optab, mode, "_q_add");
17820 set_optab_libfunc (sub_optab, mode, "_q_sub");
17821 set_optab_libfunc (neg_optab, mode, "_q_neg");
17822 set_optab_libfunc (smul_optab, mode, "_q_mul");
17823 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17824 if (TARGET_PPC_GPOPT)
17825 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17827 set_optab_libfunc (eq_optab, mode, "_q_feq");
17828 set_optab_libfunc (ne_optab, mode, "_q_fne");
17829 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17830 set_optab_libfunc (ge_optab, mode, "_q_fge");
17831 set_optab_libfunc (lt_optab, mode, "_q_flt");
17832 set_optab_libfunc (le_optab, mode, "_q_fle");
17834 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17835 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17836 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17837 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17838 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17839 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17840 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17841 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17846 rs6000_init_libfuncs (void)
17848 /* __float128 support. */
17849 if (TARGET_FLOAT128_TYPE)
17851 init_float128_ibm (IFmode);
17852 init_float128_ieee (KFmode);
17855 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17856 if (TARGET_LONG_DOUBLE_128)
17858 if (!TARGET_IEEEQUAD)
17859 init_float128_ibm (TFmode);
17861 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17863 init_float128_ieee (TFmode);
17867 /* Emit a potentially record-form instruction, setting DST from SRC.
17868 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17869 signed comparison of DST with zero. If DOT is 1, the generated RTL
17870 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17871 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17872 a separate COMPARE. */
17875 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17879 emit_move_insn (dst, src);
17883 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17885 emit_move_insn (dst, src);
17886 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17890 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17893 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17894 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17898 rtx set = gen_rtx_SET (dst, src);
17899 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17904 /* A validation routine: say whether CODE, a condition code, and MODE
17905 match. The other alternatives either don't make sense or should
17906 never be generated. */
17909 validate_condition_mode (enum rtx_code code, machine_mode mode)
17911 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17912 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17913 && GET_MODE_CLASS (mode) == MODE_CC);
17915 /* These don't make sense. */
17916 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17917 || mode != CCUNSmode);
17919 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17920 || mode == CCUNSmode);
17922 gcc_assert (mode == CCFPmode
17923 || (code != ORDERED && code != UNORDERED
17924 && code != UNEQ && code != LTGT
17925 && code != UNGT && code != UNLT
17926 && code != UNGE && code != UNLE));
17928 /* These should never be generated except for
17929 flag_finite_math_only. */
17930 gcc_assert (mode != CCFPmode
17931 || flag_finite_math_only
17932 || (code != LE && code != GE
17933 && code != UNEQ && code != LTGT
17934 && code != UNGT && code != UNLT));
17936 /* These are invalid; the information is not there. */
17937 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17941 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17942 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17943 not zero, store there the bit offset (counted from the right) where
17944 the single stretch of 1 bits begins; and similarly for B, the bit
17945 offset where it ends. */
17948 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17950 unsigned HOST_WIDE_INT val = INTVAL (mask);
17951 unsigned HOST_WIDE_INT bit;
17953 int n = GET_MODE_PRECISION (mode);
17955 if (mode != DImode && mode != SImode)
17958 if (INTVAL (mask) >= 0)
17961 ne = exact_log2 (bit);
17962 nb = exact_log2 (val + bit);
17964 else if (val + 1 == 0)
17973 nb = exact_log2 (bit);
17974 ne = exact_log2 (val + bit);
17979 ne = exact_log2 (bit);
17980 if (val + bit == 0)
17988 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17999 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18000 or rldicr instruction, to implement an AND with it in mode MODE. */
18003 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18007 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18010 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18012 if (mode == DImode)
18013 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18015 /* For SImode, rlwinm can do everything. */
18016 if (mode == SImode)
18017 return (nb < 32 && ne < 32);
18022 /* Return the instruction template for an AND with mask in mode MODE, with
18023 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18026 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18030 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18031 gcc_unreachable ();
18033 if (mode == DImode && ne == 0)
18035 operands[3] = GEN_INT (63 - nb);
18037 return "rldicl. %0,%1,0,%3";
18038 return "rldicl %0,%1,0,%3";
18041 if (mode == DImode && nb == 63)
18043 operands[3] = GEN_INT (63 - ne);
18045 return "rldicr. %0,%1,0,%3";
18046 return "rldicr %0,%1,0,%3";
18049 if (nb < 32 && ne < 32)
18051 operands[3] = GEN_INT (31 - nb);
18052 operands[4] = GEN_INT (31 - ne);
18054 return "rlwinm. %0,%1,0,%3,%4";
18055 return "rlwinm %0,%1,0,%3,%4";
18058 gcc_unreachable ();
18061 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18062 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18063 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18066 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18070 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18073 int n = GET_MODE_PRECISION (mode);
18076 if (CONST_INT_P (XEXP (shift, 1)))
18078 sh = INTVAL (XEXP (shift, 1));
18079 if (sh < 0 || sh >= n)
18083 rtx_code code = GET_CODE (shift);
18085 /* Convert any shift by 0 to a rotate, to simplify below code. */
18089 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18090 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18092 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18098 /* DImode rotates need rld*. */
18099 if (mode == DImode && code == ROTATE)
18100 return (nb == 63 || ne == 0 || ne == sh);
18102 /* SImode rotates need rlw*. */
18103 if (mode == SImode && code == ROTATE)
18104 return (nb < 32 && ne < 32 && sh < 32);
18106 /* Wrap-around masks are only okay for rotates. */
18110 /* Variable shifts are only okay for rotates. */
18114 /* Don't allow ASHIFT if the mask is wrong for that. */
18115 if (code == ASHIFT && ne < sh)
18118 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18119 if the mask is wrong for that. */
18120 if (nb < 32 && ne < 32 && sh < 32
18121 && !(code == LSHIFTRT && nb >= 32 - sh))
18124 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18125 if the mask is wrong for that. */
18126 if (code == LSHIFTRT)
18128 if (nb == 63 || ne == 0 || ne == sh)
18129 return !(code == LSHIFTRT && nb >= sh);
18134 /* Return the instruction template for a shift with mask in mode MODE, with
18135 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18138 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18142 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18143 gcc_unreachable ();
18145 if (mode == DImode && ne == 0)
18147 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18148 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18149 operands[3] = GEN_INT (63 - nb);
18151 return "rld%I2cl. %0,%1,%2,%3";
18152 return "rld%I2cl %0,%1,%2,%3";
18155 if (mode == DImode && nb == 63)
18157 operands[3] = GEN_INT (63 - ne);
18159 return "rld%I2cr. %0,%1,%2,%3";
18160 return "rld%I2cr %0,%1,%2,%3";
18164 && GET_CODE (operands[4]) != LSHIFTRT
18165 && CONST_INT_P (operands[2])
18166 && ne == INTVAL (operands[2]))
18168 operands[3] = GEN_INT (63 - nb);
18170 return "rld%I2c. %0,%1,%2,%3";
18171 return "rld%I2c %0,%1,%2,%3";
18174 if (nb < 32 && ne < 32)
18176 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18177 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18178 operands[3] = GEN_INT (31 - nb);
18179 operands[4] = GEN_INT (31 - ne);
18180 /* This insn can also be a 64-bit rotate with mask that really makes
18181 it just a shift right (with mask); the %h below are to adjust for
18182 that situation (shift count is >= 32 in that case). */
18184 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18185 return "rlw%I2nm %0,%1,%h2,%3,%4";
18188 gcc_unreachable ();
18191 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18192 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18193 ASHIFT, or LSHIFTRT) in mode MODE. */
18196 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18200 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18203 int n = GET_MODE_PRECISION (mode);
18205 int sh = INTVAL (XEXP (shift, 1));
18206 if (sh < 0 || sh >= n)
18209 rtx_code code = GET_CODE (shift);
18211 /* Convert any shift by 0 to a rotate, to simplify below code. */
18215 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18216 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18218 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18224 /* DImode rotates need rldimi. */
18225 if (mode == DImode && code == ROTATE)
18228 /* SImode rotates need rlwimi. */
18229 if (mode == SImode && code == ROTATE)
18230 return (nb < 32 && ne < 32 && sh < 32);
18232 /* Wrap-around masks are only okay for rotates. */
18236 /* Don't allow ASHIFT if the mask is wrong for that. */
18237 if (code == ASHIFT && ne < sh)
18240 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18241 if the mask is wrong for that. */
18242 if (nb < 32 && ne < 32 && sh < 32
18243 && !(code == LSHIFTRT && nb >= 32 - sh))
18246 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18247 if the mask is wrong for that. */
18248 if (code == LSHIFTRT)
18251 return !(code == LSHIFTRT && nb >= sh);
18256 /* Return the instruction template for an insert with mask in mode MODE, with
18257 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18260 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18264 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18265 gcc_unreachable ();
18267 /* Prefer rldimi because rlwimi is cracked. */
18268 if (TARGET_POWERPC64
18269 && (!dot || mode == DImode)
18270 && GET_CODE (operands[4]) != LSHIFTRT
18271 && ne == INTVAL (operands[2]))
18273 operands[3] = GEN_INT (63 - nb);
18275 return "rldimi. %0,%1,%2,%3";
18276 return "rldimi %0,%1,%2,%3";
18279 if (nb < 32 && ne < 32)
18281 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18282 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18283 operands[3] = GEN_INT (31 - nb);
18284 operands[4] = GEN_INT (31 - ne);
18286 return "rlwimi. %0,%1,%2,%3,%4";
18287 return "rlwimi %0,%1,%2,%3,%4";
18290 gcc_unreachable ();
18293 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18294 using two machine instructions. */
18297 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18299 /* There are two kinds of AND we can handle with two insns:
18300 1) those we can do with two rl* insn;
18303 We do not handle that last case yet. */
18305 /* If there is just one stretch of ones, we can do it. */
18306 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18309 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18310 one insn, we can do the whole thing with two. */
18311 unsigned HOST_WIDE_INT val = INTVAL (c);
18312 unsigned HOST_WIDE_INT bit1 = val & -val;
18313 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18314 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18315 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18316 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18319 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18320 If EXPAND is true, split rotate-and-mask instructions we generate to
18321 their constituent parts as well (this is used during expand); if DOT
18322 is 1, make the last insn a record-form instruction clobbering the
18323 destination GPR and setting the CC reg (from operands[3]); if 2, set
18324 that GPR as well as the CC reg. */
18327 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18329 gcc_assert (!(expand && dot));
18331 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18333 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18334 shift right. This generates better code than doing the masks without
18335 shifts, or shifting first right and then left. */
18337 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18339 gcc_assert (mode == DImode);
18341 int shift = 63 - nb;
18344 rtx tmp1 = gen_reg_rtx (DImode);
18345 rtx tmp2 = gen_reg_rtx (DImode);
18346 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18347 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18348 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18352 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18353 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18354 emit_move_insn (operands[0], tmp);
18355 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18356 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18361 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18362 that does the rest. */
18363 unsigned HOST_WIDE_INT bit1 = val & -val;
18364 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18365 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18366 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18368 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18369 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18371 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18373 /* Two "no-rotate"-and-mask instructions, for SImode. */
18374 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18376 gcc_assert (mode == SImode);
18378 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18379 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18380 emit_move_insn (reg, tmp);
18381 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18382 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18386 gcc_assert (mode == DImode);
18388 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18389 insns; we have to do the first in SImode, because it wraps. */
18390 if (mask2 <= 0xffffffff
18391 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18393 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18394 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18396 rtx reg_low = gen_lowpart (SImode, reg);
18397 emit_move_insn (reg_low, tmp);
18398 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18399 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18403 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18404 at the top end), rotate back and clear the other hole. */
18405 int right = exact_log2 (bit3);
18406 int left = 64 - right;
18408 /* Rotate the mask too. */
18409 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18413 rtx tmp1 = gen_reg_rtx (DImode);
18414 rtx tmp2 = gen_reg_rtx (DImode);
18415 rtx tmp3 = gen_reg_rtx (DImode);
18416 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18417 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18418 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18419 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18423 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18424 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18425 emit_move_insn (operands[0], tmp);
18426 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18427 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18428 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18432 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18433 for lfq and stfq insns iff the registers are hard registers. */
18436 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18438 /* We might have been passed a SUBREG. */
18439 if (!REG_P (reg1) || !REG_P (reg2))
18442 /* We might have been passed non floating point registers. */
18443 if (!FP_REGNO_P (REGNO (reg1))
18444 || !FP_REGNO_P (REGNO (reg2)))
18447 return (REGNO (reg1) == REGNO (reg2) - 1);
18450 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18451 addr1 and addr2 must be in consecutive memory locations
18452 (addr2 == addr1 + 8). */
18455 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18458 unsigned int reg1, reg2;
18459 int offset1, offset2;
18461 /* The mems cannot be volatile. */
18462 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18465 addr1 = XEXP (mem1, 0);
18466 addr2 = XEXP (mem2, 0);
18468 /* Extract an offset (if used) from the first addr. */
18469 if (GET_CODE (addr1) == PLUS)
18471 /* If not a REG, return zero. */
18472 if (!REG_P (XEXP (addr1, 0)))
18476 reg1 = REGNO (XEXP (addr1, 0));
18477 /* The offset must be constant! */
18478 if (!CONST_INT_P (XEXP (addr1, 1)))
18480 offset1 = INTVAL (XEXP (addr1, 1));
18483 else if (!REG_P (addr1))
18487 reg1 = REGNO (addr1);
18488 /* This was a simple (mem (reg)) expression. Offset is 0. */
18492 /* And now for the second addr. */
18493 if (GET_CODE (addr2) == PLUS)
18495 /* If not a REG, return zero. */
18496 if (!REG_P (XEXP (addr2, 0)))
18500 reg2 = REGNO (XEXP (addr2, 0));
18501 /* The offset must be constant. */
18502 if (!CONST_INT_P (XEXP (addr2, 1)))
18504 offset2 = INTVAL (XEXP (addr2, 1));
18507 else if (!REG_P (addr2))
18511 reg2 = REGNO (addr2);
18512 /* This was a simple (mem (reg)) expression. Offset is 0. */
18516 /* Both of these must have the same base register. */
18520 /* The offset for the second addr must be 8 more than the first addr. */
18521 if (offset2 != offset1 + 8)
18524 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18529 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18530 need to use DDmode, in all other cases we can use the same mode. */
18531 static machine_mode
18532 rs6000_secondary_memory_needed_mode (machine_mode mode)
18534 if (lra_in_progress && mode == SDmode)
18539 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18540 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18541 only work on the traditional altivec registers, note if an altivec register
18544 static enum rs6000_reg_type
18545 register_to_reg_type (rtx reg, bool *is_altivec)
18547 HOST_WIDE_INT regno;
18548 enum reg_class rclass;
18550 if (SUBREG_P (reg))
18551 reg = SUBREG_REG (reg);
18554 return NO_REG_TYPE;
18556 regno = REGNO (reg);
18557 if (!HARD_REGISTER_NUM_P (regno))
18559 if (!lra_in_progress && !reload_completed)
18560 return PSEUDO_REG_TYPE;
18562 regno = true_regnum (reg);
18563 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
18564 return PSEUDO_REG_TYPE;
18567 gcc_assert (regno >= 0);
18569 if (is_altivec && ALTIVEC_REGNO_P (regno))
18570 *is_altivec = true;
18572 rclass = rs6000_regno_regclass[regno];
18573 return reg_class_to_reg_type[(int)rclass];
18576 /* Helper function to return the cost of adding a TOC entry address. */
18579 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18583 if (TARGET_CMODEL != CMODEL_SMALL)
18584 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18587 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18592 /* Helper function for rs6000_secondary_reload to determine whether the memory
18593 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18594 needs reloading. Return negative if the memory is not handled by the memory
18595 helper functions and to try a different reload method, 0 if no additional
18596 instructions are need, and positive to give the extra cost for the
18600 rs6000_secondary_reload_memory (rtx addr,
18601 enum reg_class rclass,
18604 int extra_cost = 0;
18605 rtx reg, and_arg, plus_arg0, plus_arg1;
18606 addr_mask_type addr_mask;
18607 const char *type = NULL;
18608 const char *fail_msg = NULL;
18610 if (GPR_REG_CLASS_P (rclass))
18611 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18613 else if (rclass == FLOAT_REGS)
18614 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18616 else if (rclass == ALTIVEC_REGS)
18617 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18619 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18620 else if (rclass == VSX_REGS)
18621 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18622 & ~RELOAD_REG_AND_M16);
18624 /* If the register allocator hasn't made up its mind yet on the register
18625 class to use, settle on defaults to use. */
18626 else if (rclass == NO_REGS)
18628 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18629 & ~RELOAD_REG_AND_M16);
18631 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18632 addr_mask &= ~(RELOAD_REG_INDEXED
18633 | RELOAD_REG_PRE_INCDEC
18634 | RELOAD_REG_PRE_MODIFY);
18640 /* If the register isn't valid in this register class, just return now. */
18641 if ((addr_mask & RELOAD_REG_VALID) == 0)
18643 if (TARGET_DEBUG_ADDR)
18646 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18647 "not valid in class\n",
18648 GET_MODE_NAME (mode), reg_class_names[rclass]);
18655 switch (GET_CODE (addr))
18657 /* Does the register class supports auto update forms for this mode? We
18658 don't need a scratch register, since the powerpc only supports
18659 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18662 reg = XEXP (addr, 0);
18663 if (!base_reg_operand (addr, GET_MODE (reg)))
18665 fail_msg = "no base register #1";
18669 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18677 reg = XEXP (addr, 0);
18678 plus_arg1 = XEXP (addr, 1);
18679 if (!base_reg_operand (reg, GET_MODE (reg))
18680 || GET_CODE (plus_arg1) != PLUS
18681 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18683 fail_msg = "bad PRE_MODIFY";
18687 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18694 /* Do we need to simulate AND -16 to clear the bottom address bits used
18695 in VMX load/stores? Only allow the AND for vector sizes. */
18697 and_arg = XEXP (addr, 0);
18698 if (GET_MODE_SIZE (mode) != 16
18699 || !CONST_INT_P (XEXP (addr, 1))
18700 || INTVAL (XEXP (addr, 1)) != -16)
18702 fail_msg = "bad Altivec AND #1";
18706 if (rclass != ALTIVEC_REGS)
18708 if (legitimate_indirect_address_p (and_arg, false))
18711 else if (legitimate_indexed_address_p (and_arg, false))
18716 fail_msg = "bad Altivec AND #2";
18724 /* If this is an indirect address, make sure it is a base register. */
18727 if (!legitimate_indirect_address_p (addr, false))
18734 /* If this is an indexed address, make sure the register class can handle
18735 indexed addresses for this mode. */
18737 plus_arg0 = XEXP (addr, 0);
18738 plus_arg1 = XEXP (addr, 1);
18740 /* (plus (plus (reg) (constant)) (constant)) is generated during
18741 push_reload processing, so handle it now. */
18742 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18744 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18751 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18752 push_reload processing, so handle it now. */
18753 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18755 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18758 type = "indexed #2";
18762 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18764 fail_msg = "no base register #2";
18768 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18770 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18771 || !legitimate_indexed_address_p (addr, false))
18778 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18779 && CONST_INT_P (plus_arg1))
18781 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18784 type = "vector d-form offset";
18788 /* Make sure the register class can handle offset addresses. */
18789 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18791 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18794 type = "offset #2";
18800 fail_msg = "bad PLUS";
18807 /* Quad offsets are restricted and can't handle normal addresses. */
18808 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18811 type = "vector d-form lo_sum";
18814 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18816 fail_msg = "bad LO_SUM";
18820 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18827 /* Static addresses need to create a TOC entry. */
18831 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18834 type = "vector d-form lo_sum #2";
18840 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18844 /* TOC references look like offsetable memory. */
18846 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18848 fail_msg = "bad UNSPEC";
18852 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18855 type = "vector d-form lo_sum #3";
18858 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18861 type = "toc reference";
18867 fail_msg = "bad address";
18872 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18874 if (extra_cost < 0)
18876 "rs6000_secondary_reload_memory error: mode = %s, "
18877 "class = %s, addr_mask = '%s', %s\n",
18878 GET_MODE_NAME (mode),
18879 reg_class_names[rclass],
18880 rs6000_debug_addr_mask (addr_mask, false),
18881 (fail_msg != NULL) ? fail_msg : "<bad address>");
18885 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18886 "addr_mask = '%s', extra cost = %d, %s\n",
18887 GET_MODE_NAME (mode),
18888 reg_class_names[rclass],
18889 rs6000_debug_addr_mask (addr_mask, false),
18891 (type) ? type : "<none>");
18899 /* Helper function for rs6000_secondary_reload to return true if a move to a
18900 different register classe is really a simple move. */
18903 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18904 enum rs6000_reg_type from_type,
18907 int size = GET_MODE_SIZE (mode);
18909 /* Add support for various direct moves available. In this function, we only
18910 look at cases where we don't need any extra registers, and one or more
18911 simple move insns are issued. Originally small integers are not allowed
18912 in FPR/VSX registers. Single precision binary floating is not a simple
18913 move because we need to convert to the single precision memory layout.
18914 The 4-byte SDmode can be moved. TDmode values are disallowed since they
18915 need special direct move handling, which we do not support yet. */
18916 if (TARGET_DIRECT_MOVE
18917 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18918 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18920 if (TARGET_POWERPC64)
18922 /* ISA 2.07: MTVSRD or MVFVSRD. */
18926 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
18927 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
18931 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18932 if (TARGET_P8_VECTOR)
18934 if (mode == SImode)
18937 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
18941 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18942 if (mode == SDmode)
18946 /* Move to/from SPR. */
18947 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18948 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18949 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18955 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18956 special direct moves that involve allocating an extra register, return the
18957 insn code of the helper function if there is such a function or
18958 CODE_FOR_nothing if not. */
18961 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18962 enum rs6000_reg_type from_type,
18964 secondary_reload_info *sri,
18968 enum insn_code icode = CODE_FOR_nothing;
18970 int size = GET_MODE_SIZE (mode);
18972 if (TARGET_POWERPC64 && size == 16)
18974 /* Handle moving 128-bit values from GPRs to VSX point registers on
18975 ISA 2.07 (power8, power9) when running in 64-bit mode using
18976 XXPERMDI to glue the two 64-bit values back together. */
18977 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18979 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18980 icode = reg_addr[mode].reload_vsx_gpr;
18983 /* Handle moving 128-bit values from VSX point registers to GPRs on
18984 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18985 bottom 64-bit value. */
18986 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18988 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18989 icode = reg_addr[mode].reload_gpr_vsx;
18993 else if (TARGET_POWERPC64 && mode == SFmode)
18995 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18997 cost = 3; /* xscvdpspn, mfvsrd, and. */
18998 icode = reg_addr[mode].reload_gpr_vsx;
19001 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19003 cost = 2; /* mtvsrz, xscvspdpn. */
19004 icode = reg_addr[mode].reload_vsx_gpr;
19008 else if (!TARGET_POWERPC64 && size == 8)
19010 /* Handle moving 64-bit values from GPRs to floating point registers on
19011 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19012 32-bit values back together. Altivec register classes must be handled
19013 specially since a different instruction is used, and the secondary
19014 reload support requires a single instruction class in the scratch
19015 register constraint. However, right now TFmode is not allowed in
19016 Altivec registers, so the pattern will never match. */
19017 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19019 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19020 icode = reg_addr[mode].reload_fpr_gpr;
19024 if (icode != CODE_FOR_nothing)
19029 sri->icode = icode;
19030 sri->extra_cost = cost;
19037 /* Return whether a move between two register classes can be done either
19038 directly (simple move) or via a pattern that uses a single extra temporary
19039 (using ISA 2.07's direct move in this case. */
19042 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19043 enum rs6000_reg_type from_type,
19045 secondary_reload_info *sri,
19048 /* Fall back to load/store reloads if either type is not a register. */
19049 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19052 /* If we haven't allocated registers yet, assume the move can be done for the
19053 standard register types. */
19054 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19055 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19056 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19059 /* Moves to the same set of registers is a simple move for non-specialized
19061 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19064 /* Check whether a simple move can be done directly. */
19065 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19069 sri->icode = CODE_FOR_nothing;
19070 sri->extra_cost = 0;
19075 /* Now check if we can do it in a few steps. */
19076 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19080 /* Inform reload about cases where moving X with a mode MODE to a register in
19081 RCLASS requires an extra scratch or immediate register. Return the class
19082 needed for the immediate register.
19084 For VSX and Altivec, we may need a register to convert sp+offset into
19087 For misaligned 64-bit gpr loads and stores we need a register to
19088 convert an offset address to indirect. */
19091 rs6000_secondary_reload (bool in_p,
19093 reg_class_t rclass_i,
19095 secondary_reload_info *sri)
19097 enum reg_class rclass = (enum reg_class) rclass_i;
19098 reg_class_t ret = ALL_REGS;
19099 enum insn_code icode;
19100 bool default_p = false;
19101 bool done_p = false;
19103 /* Allow subreg of memory before/during reload. */
19104 bool memory_p = (MEM_P (x)
19105 || (!reload_completed && SUBREG_P (x)
19106 && MEM_P (SUBREG_REG (x))));
19108 sri->icode = CODE_FOR_nothing;
19109 sri->t_icode = CODE_FOR_nothing;
19110 sri->extra_cost = 0;
19112 ? reg_addr[mode].reload_load
19113 : reg_addr[mode].reload_store);
19115 if (REG_P (x) || register_operand (x, mode))
19117 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19118 bool altivec_p = (rclass == ALTIVEC_REGS);
19119 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19122 std::swap (to_type, from_type);
19124 /* Can we do a direct move of some sort? */
19125 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19128 icode = (enum insn_code)sri->icode;
19135 /* Make sure 0.0 is not reloaded or forced into memory. */
19136 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19143 /* If this is a scalar floating point value and we want to load it into the
19144 traditional Altivec registers, do it via a move via a traditional floating
19145 point register, unless we have D-form addressing. Also make sure that
19146 non-zero constants use a FPR. */
19147 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19148 && !mode_supports_vmx_dform (mode)
19149 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19150 && (memory_p || CONST_DOUBLE_P (x)))
19157 /* Handle reload of load/stores if we have reload helper functions. */
19158 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19160 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19163 if (extra_cost >= 0)
19167 if (extra_cost > 0)
19169 sri->extra_cost = extra_cost;
19170 sri->icode = icode;
19175 /* Handle unaligned loads and stores of integer registers. */
19176 if (!done_p && TARGET_POWERPC64
19177 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19179 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19181 rtx addr = XEXP (x, 0);
19182 rtx off = address_offset (addr);
19184 if (off != NULL_RTX)
19186 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19187 unsigned HOST_WIDE_INT offset = INTVAL (off);
19189 /* We need a secondary reload when our legitimate_address_p
19190 says the address is good (as otherwise the entire address
19191 will be reloaded), and the offset is not a multiple of
19192 four or we have an address wrap. Address wrap will only
19193 occur for LO_SUMs since legitimate_offset_address_p
19194 rejects addresses for 16-byte mems that will wrap. */
19195 if (GET_CODE (addr) == LO_SUM
19196 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19197 && ((offset & 3) != 0
19198 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19199 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19200 && (offset & 3) != 0))
19202 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19204 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19205 : CODE_FOR_reload_di_load);
19207 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19208 : CODE_FOR_reload_di_store);
19209 sri->extra_cost = 2;
19220 if (!done_p && !TARGET_POWERPC64
19221 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19223 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19225 rtx addr = XEXP (x, 0);
19226 rtx off = address_offset (addr);
19228 if (off != NULL_RTX)
19230 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19231 unsigned HOST_WIDE_INT offset = INTVAL (off);
19233 /* We need a secondary reload when our legitimate_address_p
19234 says the address is good (as otherwise the entire address
19235 will be reloaded), and we have a wrap.
19237 legitimate_lo_sum_address_p allows LO_SUM addresses to
19238 have any offset so test for wrap in the low 16 bits.
19240 legitimate_offset_address_p checks for the range
19241 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19242 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19243 [0x7ff4,0x7fff] respectively, so test for the
19244 intersection of these ranges, [0x7ffc,0x7fff] and
19245 [0x7ff4,0x7ff7] respectively.
19247 Note that the address we see here may have been
19248 manipulated by legitimize_reload_address. */
19249 if (GET_CODE (addr) == LO_SUM
19250 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19251 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19254 sri->icode = CODE_FOR_reload_si_load;
19256 sri->icode = CODE_FOR_reload_si_store;
19257 sri->extra_cost = 2;
19272 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19274 gcc_assert (ret != ALL_REGS);
19276 if (TARGET_DEBUG_ADDR)
19279 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19281 reg_class_names[ret],
19282 in_p ? "true" : "false",
19283 reg_class_names[rclass],
19284 GET_MODE_NAME (mode));
19286 if (reload_completed)
19287 fputs (", after reload", stderr);
19290 fputs (", done_p not set", stderr);
19293 fputs (", default secondary reload", stderr);
19295 if (sri->icode != CODE_FOR_nothing)
19296 fprintf (stderr, ", reload func = %s, extra cost = %d",
19297 insn_data[sri->icode].name, sri->extra_cost);
19299 else if (sri->extra_cost > 0)
19300 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19302 fputs ("\n", stderr);
19309 /* Better tracing for rs6000_secondary_reload_inner. */
19312 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19317 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19319 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19320 store_p ? "store" : "load");
19323 set = gen_rtx_SET (mem, reg);
19325 set = gen_rtx_SET (reg, mem);
19327 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19328 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19331 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19332 ATTRIBUTE_NORETURN;
19335 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19338 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19339 gcc_unreachable ();
19342 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19343 reload helper functions. These were identified in
19344 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19345 reload, it calls the insns:
19346 reload_<RELOAD:mode>_<P:mptrsize>_store
19347 reload_<RELOAD:mode>_<P:mptrsize>_load
19349 which in turn calls this function, to do whatever is necessary to create
19350 valid addresses. */
19353 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19355 int regno = true_regnum (reg);
19356 machine_mode mode = GET_MODE (reg);
19357 addr_mask_type addr_mask;
19360 rtx op_reg, op0, op1;
19365 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
19366 || !base_reg_operand (scratch, GET_MODE (scratch)))
19367 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19369 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19370 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19372 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19373 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19375 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19376 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19379 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19381 /* Make sure the mode is valid in this register class. */
19382 if ((addr_mask & RELOAD_REG_VALID) == 0)
19383 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19385 if (TARGET_DEBUG_ADDR)
19386 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19388 new_addr = addr = XEXP (mem, 0);
19389 switch (GET_CODE (addr))
19391 /* Does the register class support auto update forms for this mode? If
19392 not, do the update now. We don't need a scratch register, since the
19393 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19396 op_reg = XEXP (addr, 0);
19397 if (!base_reg_operand (op_reg, Pmode))
19398 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19400 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19402 int delta = GET_MODE_SIZE (mode);
19403 if (GET_CODE (addr) == PRE_DEC)
19405 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
19411 op0 = XEXP (addr, 0);
19412 op1 = XEXP (addr, 1);
19413 if (!base_reg_operand (op0, Pmode)
19414 || GET_CODE (op1) != PLUS
19415 || !rtx_equal_p (op0, XEXP (op1, 0)))
19416 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19418 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19420 emit_insn (gen_rtx_SET (op0, op1));
19425 /* Do we need to simulate AND -16 to clear the bottom address bits used
19426 in VMX load/stores? */
19428 op0 = XEXP (addr, 0);
19429 op1 = XEXP (addr, 1);
19430 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19432 if (REG_P (op0) || SUBREG_P (op0))
19435 else if (GET_CODE (op1) == PLUS)
19437 emit_insn (gen_rtx_SET (scratch, op1));
19442 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19444 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19445 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19446 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19447 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19448 new_addr = scratch;
19452 /* If this is an indirect address, make sure it is a base register. */
19455 if (!base_reg_operand (addr, GET_MODE (addr)))
19457 emit_insn (gen_rtx_SET (scratch, addr));
19458 new_addr = scratch;
19462 /* If this is an indexed address, make sure the register class can handle
19463 indexed addresses for this mode. */
19465 op0 = XEXP (addr, 0);
19466 op1 = XEXP (addr, 1);
19467 if (!base_reg_operand (op0, Pmode))
19468 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19470 else if (int_reg_operand (op1, Pmode))
19472 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19474 emit_insn (gen_rtx_SET (scratch, addr));
19475 new_addr = scratch;
19479 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19481 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19482 || !quad_address_p (addr, mode, false))
19484 emit_insn (gen_rtx_SET (scratch, addr));
19485 new_addr = scratch;
19489 /* Make sure the register class can handle offset addresses. */
19490 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19492 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19494 emit_insn (gen_rtx_SET (scratch, addr));
19495 new_addr = scratch;
19500 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19505 op0 = XEXP (addr, 0);
19506 op1 = XEXP (addr, 1);
19507 if (!base_reg_operand (op0, Pmode))
19508 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19510 else if (int_reg_operand (op1, Pmode))
19512 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19514 emit_insn (gen_rtx_SET (scratch, addr));
19515 new_addr = scratch;
19519 /* Quad offsets are restricted and can't handle normal addresses. */
19520 else if (mode_supports_dq_form (mode))
19522 emit_insn (gen_rtx_SET (scratch, addr));
19523 new_addr = scratch;
19526 /* Make sure the register class can handle offset addresses. */
19527 else if (legitimate_lo_sum_address_p (mode, addr, false))
19529 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19531 emit_insn (gen_rtx_SET (scratch, addr));
19532 new_addr = scratch;
19537 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19544 rs6000_emit_move (scratch, addr, Pmode);
19545 new_addr = scratch;
19549 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19552 /* Adjust the address if it changed. */
19553 if (addr != new_addr)
19555 mem = replace_equiv_address_nv (mem, new_addr);
19556 if (TARGET_DEBUG_ADDR)
19557 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19560 /* Now create the move. */
19562 emit_insn (gen_rtx_SET (mem, reg));
19564 emit_insn (gen_rtx_SET (reg, mem));
19569 /* Convert reloads involving 64-bit gprs and misaligned offset
19570 addressing, or multiple 32-bit gprs and offsets that are too large,
19571 to use indirect addressing. */
19574 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19576 int regno = true_regnum (reg);
19577 enum reg_class rclass;
19579 rtx scratch_or_premodify = scratch;
19581 if (TARGET_DEBUG_ADDR)
19583 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19584 store_p ? "store" : "load");
19585 fprintf (stderr, "reg:\n");
19587 fprintf (stderr, "mem:\n");
19589 fprintf (stderr, "scratch:\n");
19590 debug_rtx (scratch);
19593 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
19594 gcc_assert (MEM_P (mem));
19595 rclass = REGNO_REG_CLASS (regno);
19596 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19597 addr = XEXP (mem, 0);
19599 if (GET_CODE (addr) == PRE_MODIFY)
19601 gcc_assert (REG_P (XEXP (addr, 0))
19602 && GET_CODE (XEXP (addr, 1)) == PLUS
19603 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19604 scratch_or_premodify = XEXP (addr, 0);
19605 addr = XEXP (addr, 1);
19607 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19609 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19611 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19613 /* Now create the move. */
19615 emit_insn (gen_rtx_SET (mem, reg));
19617 emit_insn (gen_rtx_SET (reg, mem));
19622 /* Given an rtx X being reloaded into a reg required to be
19623 in class CLASS, return the class of reg to actually use.
19624 In general this is just CLASS; but on some machines
19625 in some cases it is preferable to use a more restrictive class.
19627 On the RS/6000, we have to return NO_REGS when we want to reload a
19628 floating-point CONST_DOUBLE to force it to be copied to memory.
19630 We also don't want to reload integer values into floating-point
19631 registers if we can at all help it. In fact, this can
19632 cause reload to die, if it tries to generate a reload of CTR
19633 into a FP register and discovers it doesn't have the memory location
19636 ??? Would it be a good idea to have reload do the converse, that is
19637 try to reload floating modes into FP registers if possible?
19640 static enum reg_class
19641 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19643 machine_mode mode = GET_MODE (x);
19644 bool is_constant = CONSTANT_P (x);
19646 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19647 reload class for it. */
19648 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19649 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19652 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19653 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19656 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19657 the reloading of address expressions using PLUS into floating point
19659 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19663 /* Zero is always allowed in all VSX registers. */
19664 if (x == CONST0_RTX (mode))
19667 /* If this is a vector constant that can be formed with a few Altivec
19668 instructions, we want altivec registers. */
19669 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19670 return ALTIVEC_REGS;
19672 /* If this is an integer constant that can easily be loaded into
19673 vector registers, allow it. */
19674 if (CONST_INT_P (x))
19676 HOST_WIDE_INT value = INTVAL (x);
19678 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19679 2.06 can generate it in the Altivec registers with
19683 if (TARGET_P8_VECTOR)
19685 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19686 return ALTIVEC_REGS;
19691 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19692 a sign extend in the Altivec registers. */
19693 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19694 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19695 return ALTIVEC_REGS;
19698 /* Force constant to memory. */
19702 /* D-form addressing can easily reload the value. */
19703 if (mode_supports_vmx_dform (mode)
19704 || mode_supports_dq_form (mode))
19707 /* If this is a scalar floating point value and we don't have D-form
19708 addressing, prefer the traditional floating point registers so that we
19709 can use D-form (register+offset) addressing. */
19710 if (rclass == VSX_REGS
19711 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19714 /* Prefer the Altivec registers if Altivec is handling the vector
19715 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19717 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19718 || mode == V1TImode)
19719 return ALTIVEC_REGS;
19724 if (is_constant || GET_CODE (x) == PLUS)
19726 if (reg_class_subset_p (GENERAL_REGS, rclass))
19727 return GENERAL_REGS;
19728 if (reg_class_subset_p (BASE_REGS, rclass))
19733 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
19734 return GENERAL_REGS;
19739 /* Debug version of rs6000_preferred_reload_class. */
19740 static enum reg_class
19741 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19743 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19746 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19748 reg_class_names[ret], reg_class_names[rclass],
19749 GET_MODE_NAME (GET_MODE (x)));
19755 /* If we are copying between FP or AltiVec registers and anything else, we need
19756 a memory location. The exception is when we are targeting ppc64 and the
19757 move to/from fpr to gpr instructions are available. Also, under VSX, you
19758 can copy vector registers from the FP register set to the Altivec register
19759 set and vice versa. */
19762 rs6000_secondary_memory_needed (machine_mode mode,
19763 reg_class_t from_class,
19764 reg_class_t to_class)
19766 enum rs6000_reg_type from_type, to_type;
19767 bool altivec_p = ((from_class == ALTIVEC_REGS)
19768 || (to_class == ALTIVEC_REGS));
19770 /* If a simple/direct move is available, we don't need secondary memory */
19771 from_type = reg_class_to_reg_type[(int)from_class];
19772 to_type = reg_class_to_reg_type[(int)to_class];
19774 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19775 (secondary_reload_info *)0, altivec_p))
19778 /* If we have a floating point or vector register class, we need to use
19779 memory to transfer the data. */
19780 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19786 /* Debug version of rs6000_secondary_memory_needed. */
19788 rs6000_debug_secondary_memory_needed (machine_mode mode,
19789 reg_class_t from_class,
19790 reg_class_t to_class)
19792 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19795 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19796 "to_class = %s, mode = %s\n",
19797 ret ? "true" : "false",
19798 reg_class_names[from_class],
19799 reg_class_names[to_class],
19800 GET_MODE_NAME (mode));
19805 /* Return the register class of a scratch register needed to copy IN into
19806 or out of a register in RCLASS in MODE. If it can be done directly,
19807 NO_REGS is returned. */
19809 static enum reg_class
19810 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19815 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19817 && MACHOPIC_INDIRECT
19821 /* We cannot copy a symbolic operand directly into anything
19822 other than BASE_REGS for TARGET_ELF. So indicate that a
19823 register from BASE_REGS is needed as an intermediate
19826 On Darwin, pic addresses require a load from memory, which
19827 needs a base register. */
19828 if (rclass != BASE_REGS
19829 && (SYMBOL_REF_P (in)
19830 || GET_CODE (in) == HIGH
19831 || GET_CODE (in) == LABEL_REF
19832 || GET_CODE (in) == CONST))
19838 regno = REGNO (in);
19839 if (!HARD_REGISTER_NUM_P (regno))
19841 regno = true_regnum (in);
19842 if (!HARD_REGISTER_NUM_P (regno))
19846 else if (SUBREG_P (in))
19848 regno = true_regnum (in);
19849 if (!HARD_REGISTER_NUM_P (regno))
19855 /* If we have VSX register moves, prefer moving scalar values between
19856 Altivec registers and GPR by going via an FPR (and then via memory)
19857 instead of reloading the secondary memory address for Altivec moves. */
19859 && GET_MODE_SIZE (mode) < 16
19860 && !mode_supports_vmx_dform (mode)
19861 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19862 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19863 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19864 && (regno >= 0 && INT_REGNO_P (regno)))))
19867 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19869 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19870 || (regno >= 0 && INT_REGNO_P (regno)))
19873 /* Constants, memory, and VSX registers can go into VSX registers (both the
19874 traditional floating point and the altivec registers). */
19875 if (rclass == VSX_REGS
19876 && (regno == -1 || VSX_REGNO_P (regno)))
19879 /* Constants, memory, and FP registers can go into FP registers. */
19880 if ((regno == -1 || FP_REGNO_P (regno))
19881 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
19882 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19884 /* Memory, and AltiVec registers can go into AltiVec registers. */
19885 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19886 && rclass == ALTIVEC_REGS)
19889 /* We can copy among the CR registers. */
19890 if ((rclass == CR_REGS || rclass == CR0_REGS)
19891 && regno >= 0 && CR_REGNO_P (regno))
19894 /* Otherwise, we need GENERAL_REGS. */
19895 return GENERAL_REGS;
19898 /* Debug version of rs6000_secondary_reload_class. */
19899 static enum reg_class
19900 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19901 machine_mode mode, rtx in)
19903 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19905 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19906 "mode = %s, input rtx:\n",
19907 reg_class_names[ret], reg_class_names[rclass],
19908 GET_MODE_NAME (mode));
19914 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19917 rs6000_can_change_mode_class (machine_mode from,
19919 reg_class_t rclass)
19921 unsigned from_size = GET_MODE_SIZE (from);
19922 unsigned to_size = GET_MODE_SIZE (to);
19924 if (from_size != to_size)
19926 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19928 if (reg_classes_intersect_p (xclass, rclass))
19930 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
19931 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
19932 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19933 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19935 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19936 single register under VSX because the scalar part of the register
19937 is in the upper 64-bits, and not the lower 64-bits. Types like
19938 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19939 IEEE floating point can't overlap, and neither can small
19942 if (to_float128_vector_p && from_float128_vector_p)
19945 else if (to_float128_vector_p || from_float128_vector_p)
19948 /* TDmode in floating-mode registers must always go into a register
19949 pair with the most significant word in the even-numbered register
19950 to match ISA requirements. In little-endian mode, this does not
19951 match subreg numbering, so we cannot allow subregs. */
19952 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19955 if (from_size < 8 || to_size < 8)
19958 if (from_size == 8 && (8 * to_nregs) != to_size)
19961 if (to_size == 8 && (8 * from_nregs) != from_size)
19970 /* Since the VSX register set includes traditional floating point registers
19971 and altivec registers, just check for the size being different instead of
19972 trying to check whether the modes are vector modes. Otherwise it won't
19973 allow say DF and DI to change classes. For types like TFmode and TDmode
19974 that take 2 64-bit registers, rather than a single 128-bit register, don't
19975 allow subregs of those types to other 128 bit types. */
19976 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19978 unsigned num_regs = (from_size + 15) / 16;
19979 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
19980 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
19983 return (from_size == 8 || from_size == 16);
19986 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19987 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19993 /* Debug version of rs6000_can_change_mode_class. */
19995 rs6000_debug_can_change_mode_class (machine_mode from,
19997 reg_class_t rclass)
19999 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20002 "rs6000_can_change_mode_class, return %s, from = %s, "
20003 "to = %s, rclass = %s\n",
20004 ret ? "true" : "false",
20005 GET_MODE_NAME (from), GET_MODE_NAME (to),
20006 reg_class_names[rclass]);
20011 /* Return a string to do a move operation of 128 bits of data. */
20014 rs6000_output_move_128bit (rtx operands[])
20016 rtx dest = operands[0];
20017 rtx src = operands[1];
20018 machine_mode mode = GET_MODE (dest);
20021 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20022 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20026 dest_regno = REGNO (dest);
20027 dest_gpr_p = INT_REGNO_P (dest_regno);
20028 dest_fp_p = FP_REGNO_P (dest_regno);
20029 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20030 dest_vsx_p = dest_fp_p | dest_vmx_p;
20035 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20040 src_regno = REGNO (src);
20041 src_gpr_p = INT_REGNO_P (src_regno);
20042 src_fp_p = FP_REGNO_P (src_regno);
20043 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20044 src_vsx_p = src_fp_p | src_vmx_p;
20049 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20052 /* Register moves. */
20053 if (dest_regno >= 0 && src_regno >= 0)
20060 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20061 return (WORDS_BIG_ENDIAN
20062 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20063 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20065 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20069 else if (TARGET_VSX && dest_vsx_p)
20072 return "xxlor %x0,%x1,%x1";
20074 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20075 return (WORDS_BIG_ENDIAN
20076 ? "mtvsrdd %x0,%1,%L1"
20077 : "mtvsrdd %x0,%L1,%1");
20079 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20083 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20084 return "vor %0,%1,%1";
20086 else if (dest_fp_p && src_fp_p)
20091 else if (dest_regno >= 0 && MEM_P (src))
20095 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20101 else if (TARGET_ALTIVEC && dest_vmx_p
20102 && altivec_indexed_or_indirect_operand (src, mode))
20103 return "lvx %0,%y1";
20105 else if (TARGET_VSX && dest_vsx_p)
20107 if (mode_supports_dq_form (mode)
20108 && quad_address_p (XEXP (src, 0), mode, true))
20109 return "lxv %x0,%1";
20111 else if (TARGET_P9_VECTOR)
20112 return "lxvx %x0,%y1";
20114 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20115 return "lxvw4x %x0,%y1";
20118 return "lxvd2x %x0,%y1";
20121 else if (TARGET_ALTIVEC && dest_vmx_p)
20122 return "lvx %0,%y1";
20124 else if (dest_fp_p)
20129 else if (src_regno >= 0 && MEM_P (dest))
20133 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20134 return "stq %1,%0";
20139 else if (TARGET_ALTIVEC && src_vmx_p
20140 && altivec_indexed_or_indirect_operand (dest, mode))
20141 return "stvx %1,%y0";
20143 else if (TARGET_VSX && src_vsx_p)
20145 if (mode_supports_dq_form (mode)
20146 && quad_address_p (XEXP (dest, 0), mode, true))
20147 return "stxv %x1,%0";
20149 else if (TARGET_P9_VECTOR)
20150 return "stxvx %x1,%y0";
20152 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20153 return "stxvw4x %x1,%y0";
20156 return "stxvd2x %x1,%y0";
20159 else if (TARGET_ALTIVEC && src_vmx_p)
20160 return "stvx %1,%y0";
20167 else if (dest_regno >= 0
20168 && (CONST_INT_P (src)
20169 || CONST_WIDE_INT_P (src)
20170 || CONST_DOUBLE_P (src)
20171 || GET_CODE (src) == CONST_VECTOR))
20176 else if ((dest_vmx_p && TARGET_ALTIVEC)
20177 || (dest_vsx_p && TARGET_VSX))
20178 return output_vec_const_move (operands);
20181 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20184 /* Validate a 128-bit move. */
20186 rs6000_move_128bit_ok_p (rtx operands[])
20188 machine_mode mode = GET_MODE (operands[0]);
20189 return (gpc_reg_operand (operands[0], mode)
20190 || gpc_reg_operand (operands[1], mode));
20193 /* Return true if a 128-bit move needs to be split. */
20195 rs6000_split_128bit_ok_p (rtx operands[])
20197 if (!reload_completed)
20200 if (!gpr_or_gpr_p (operands[0], operands[1]))
20203 if (quad_load_store_p (operands[0], operands[1]))
20210 /* Given a comparison operation, return the bit number in CCR to test. We
20211 know this is a valid comparison.
20213 SCC_P is 1 if this is for an scc. That means that %D will have been
20214 used instead of %C, so the bits will be in different places.
20216 Return -1 if OP isn't a valid comparison for some reason. */
20219 ccr_bit (rtx op, int scc_p)
20221 enum rtx_code code = GET_CODE (op);
20222 machine_mode cc_mode;
20227 if (!COMPARISON_P (op))
20230 reg = XEXP (op, 0);
20232 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
20235 cc_mode = GET_MODE (reg);
20236 cc_regnum = REGNO (reg);
20237 base_bit = 4 * (cc_regnum - CR0_REGNO);
20239 validate_condition_mode (code, cc_mode);
20241 /* When generating a sCOND operation, only positive conditions are
20260 return scc_p ? base_bit + 3 : base_bit + 2;
20262 return base_bit + 2;
20263 case GT: case GTU: case UNLE:
20264 return base_bit + 1;
20265 case LT: case LTU: case UNGE:
20267 case ORDERED: case UNORDERED:
20268 return base_bit + 3;
20271 /* If scc, we will have done a cror to put the bit in the
20272 unordered position. So test that bit. For integer, this is ! LT
20273 unless this is an scc insn. */
20274 return scc_p ? base_bit + 3 : base_bit;
20277 return scc_p ? base_bit + 3 : base_bit + 1;
20284 /* Return the GOT register. */
20287 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20289 /* The second flow pass currently (June 1999) can't update
20290 regs_ever_live without disturbing other parts of the compiler, so
20291 update it here to make the prolog/epilogue code happy. */
20292 if (!can_create_pseudo_p ()
20293 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20294 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20296 crtl->uses_pic_offset_table = 1;
20298 return pic_offset_table_rtx;
20301 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
20303 /* Write out a function code label. */
20306 rs6000_output_function_entry (FILE *file, const char *fname)
20308 if (fname[0] != '.')
20310 switch (DEFAULT_ABI)
20313 gcc_unreachable ();
20319 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20329 RS6000_OUTPUT_BASENAME (file, fname);
20332 /* Print an operand. Recognize special options, documented below. */
20335 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
20336 only introduced by the linker, when applying the sda21
20338 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20339 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20341 #define SMALL_DATA_RELOC "sda21"
20342 #define SMALL_DATA_REG 0
20346 print_operand (FILE *file, rtx x, int code)
20349 unsigned HOST_WIDE_INT uval;
20353 /* %a is output_address. */
20355 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20359 /* Like 'J' but get to the GT bit only. */
20360 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20362 output_operand_lossage ("invalid %%D value");
20366 /* Bit 1 is GT bit. */
20367 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20369 /* Add one for shift count in rlinm for scc. */
20370 fprintf (file, "%d", i + 1);
20374 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20377 output_operand_lossage ("invalid %%e value");
20382 if ((uval & 0xffff) == 0 && uval != 0)
20387 /* X is a CR register. Print the number of the EQ bit of the CR */
20388 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20389 output_operand_lossage ("invalid %%E value");
20391 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20395 /* X is a CR register. Print the shift count needed to move it
20396 to the high-order four bits. */
20397 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20398 output_operand_lossage ("invalid %%f value");
20400 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20404 /* Similar, but print the count for the rotate in the opposite
20406 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20407 output_operand_lossage ("invalid %%F value");
20409 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20413 /* X is a constant integer. If it is negative, print "m",
20414 otherwise print "z". This is to make an aze or ame insn. */
20415 if (!CONST_INT_P (x))
20416 output_operand_lossage ("invalid %%G value");
20417 else if (INTVAL (x) >= 0)
20424 /* If constant, output low-order five bits. Otherwise, write
20427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20429 print_operand (file, x, 0);
20433 /* If constant, output low-order six bits. Otherwise, write
20436 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20438 print_operand (file, x, 0);
20442 /* Print `i' if this is a constant, else nothing. */
20448 /* Write the bit number in CCR for jump. */
20449 i = ccr_bit (x, 0);
20451 output_operand_lossage ("invalid %%j code");
20453 fprintf (file, "%d", i);
20457 /* Similar, but add one for shift count in rlinm for scc and pass
20458 scc flag to `ccr_bit'. */
20459 i = ccr_bit (x, 1);
20461 output_operand_lossage ("invalid %%J code");
20463 /* If we want bit 31, write a shift count of zero, not 32. */
20464 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20468 /* X must be a constant. Write the 1's complement of the
20471 output_operand_lossage ("invalid %%k value");
20473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20477 /* X must be a symbolic constant on ELF. Write an
20478 expression suitable for an 'addi' that adds in the low 16
20479 bits of the MEM. */
20480 if (GET_CODE (x) == CONST)
20482 if (GET_CODE (XEXP (x, 0)) != PLUS
20483 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
20484 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20485 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20486 output_operand_lossage ("invalid %%K value");
20488 print_operand_address (file, x);
20489 fputs ("@l", file);
20492 /* %l is output_asm_label. */
20495 /* Write second word of DImode or DFmode reference. Works on register
20496 or non-indexed memory only. */
20498 fputs (reg_names[REGNO (x) + 1], file);
20499 else if (MEM_P (x))
20501 machine_mode mode = GET_MODE (x);
20502 /* Handle possible auto-increment. Since it is pre-increment and
20503 we have already done it, we can just use an offset of word. */
20504 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20505 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20506 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20508 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20509 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20512 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20516 if (small_data_operand (x, GET_MODE (x)))
20517 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20518 reg_names[SMALL_DATA_REG]);
20522 case 'N': /* Unused */
20523 /* Write the number of elements in the vector times 4. */
20524 if (GET_CODE (x) != PARALLEL)
20525 output_operand_lossage ("invalid %%N value");
20527 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20530 case 'O': /* Unused */
20531 /* Similar, but subtract 1 first. */
20532 if (GET_CODE (x) != PARALLEL)
20533 output_operand_lossage ("invalid %%O value");
20535 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20539 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20542 || (i = exact_log2 (INTVAL (x))) < 0)
20543 output_operand_lossage ("invalid %%p value");
20545 fprintf (file, "%d", i);
20549 /* The operand must be an indirect memory reference. The result
20550 is the register name. */
20551 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
20552 || REGNO (XEXP (x, 0)) >= 32)
20553 output_operand_lossage ("invalid %%P value");
20555 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20559 /* This outputs the logical code corresponding to a boolean
20560 expression. The expression may have one or both operands
20561 negated (if one, only the first one). For condition register
20562 logical operations, it will also treat the negated
20563 CR codes as NOTs, but not handle NOTs of them. */
20565 const char *const *t = 0;
20567 enum rtx_code code = GET_CODE (x);
20568 static const char * const tbl[3][3] = {
20569 { "and", "andc", "nor" },
20570 { "or", "orc", "nand" },
20571 { "xor", "eqv", "xor" } };
20575 else if (code == IOR)
20577 else if (code == XOR)
20580 output_operand_lossage ("invalid %%q value");
20582 if (GET_CODE (XEXP (x, 0)) != NOT)
20586 if (GET_CODE (XEXP (x, 1)) == NOT)
20597 if (! TARGET_MFCRF)
20603 /* X is a CR register. Print the mask for `mtcrf'. */
20604 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20605 output_operand_lossage ("invalid %%R value");
20607 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20611 /* Low 5 bits of 32 - value */
20613 output_operand_lossage ("invalid %%s value");
20615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20619 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20620 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20622 output_operand_lossage ("invalid %%t value");
20626 /* Bit 3 is OV bit. */
20627 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20629 /* If we want bit 31, write a shift count of zero, not 32. */
20630 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20634 /* Print the symbolic name of a branch target register. */
20635 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20636 x = XVECEXP (x, 0, 0);
20637 if (!REG_P (x) || (REGNO (x) != LR_REGNO
20638 && REGNO (x) != CTR_REGNO))
20639 output_operand_lossage ("invalid %%T value");
20640 else if (REGNO (x) == LR_REGNO)
20641 fputs ("lr", file);
20643 fputs ("ctr", file);
20647 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20648 for use in unsigned operand. */
20651 output_operand_lossage ("invalid %%u value");
20656 if ((uval & 0xffff) == 0)
20659 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20663 /* High-order 16 bits of constant for use in signed operand. */
20665 output_operand_lossage ("invalid %%v value");
20667 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20668 (INTVAL (x) >> 16) & 0xffff);
20672 /* Print `u' if this has an auto-increment or auto-decrement. */
20674 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20675 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20676 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20681 /* Print the trap code for this operand. */
20682 switch (GET_CODE (x))
20685 fputs ("eq", file); /* 4 */
20688 fputs ("ne", file); /* 24 */
20691 fputs ("lt", file); /* 16 */
20694 fputs ("le", file); /* 20 */
20697 fputs ("gt", file); /* 8 */
20700 fputs ("ge", file); /* 12 */
20703 fputs ("llt", file); /* 2 */
20706 fputs ("lle", file); /* 6 */
20709 fputs ("lgt", file); /* 1 */
20712 fputs ("lge", file); /* 5 */
20715 output_operand_lossage ("invalid %%V value");
20720 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20723 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20724 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20726 print_operand (file, x, 0);
20730 /* X is a FPR or Altivec register used in a VSX context. */
20731 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
20732 output_operand_lossage ("invalid %%x value");
20735 int reg = REGNO (x);
20736 int vsx_reg = (FP_REGNO_P (reg)
20738 : reg - FIRST_ALTIVEC_REGNO + 32);
20740 #ifdef TARGET_REGNAMES
20741 if (TARGET_REGNAMES)
20742 fprintf (file, "%%vs%d", vsx_reg);
20745 fprintf (file, "%d", vsx_reg);
20751 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20752 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20753 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20758 /* Like 'L', for third word of TImode/PTImode */
20760 fputs (reg_names[REGNO (x) + 2], file);
20761 else if (MEM_P (x))
20763 machine_mode mode = GET_MODE (x);
20764 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20765 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20766 output_address (mode, plus_constant (Pmode,
20767 XEXP (XEXP (x, 0), 0), 8));
20768 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20769 output_address (mode, plus_constant (Pmode,
20770 XEXP (XEXP (x, 0), 0), 8));
20772 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20773 if (small_data_operand (x, GET_MODE (x)))
20774 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20775 reg_names[SMALL_DATA_REG]);
20780 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20781 x = XVECEXP (x, 0, 1);
20782 /* X is a SYMBOL_REF. Write out the name preceded by a
20783 period and without any trailing data in brackets. Used for function
20784 names. If we are configured for System V (or the embedded ABI) on
20785 the PowerPC, do not emit the period, since those systems do not use
20786 TOCs and the like. */
20787 if (!SYMBOL_REF_P (x))
20789 output_operand_lossage ("invalid %%z value");
20793 /* For macho, check to see if we need a stub. */
20796 const char *name = XSTR (x, 0);
20798 if (darwin_picsymbol_stubs
20799 && MACHOPIC_INDIRECT
20800 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20801 name = machopic_indirection_name (x, /*stub_p=*/true);
20803 assemble_name (file, name);
20805 else if (!DOT_SYMBOLS)
20806 assemble_name (file, XSTR (x, 0));
20808 rs6000_output_function_entry (file, XSTR (x, 0));
20812 /* Like 'L', for last word of TImode/PTImode. */
20814 fputs (reg_names[REGNO (x) + 3], file);
20815 else if (MEM_P (x))
20817 machine_mode mode = GET_MODE (x);
20818 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20819 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20820 output_address (mode, plus_constant (Pmode,
20821 XEXP (XEXP (x, 0), 0), 12));
20822 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20823 output_address (mode, plus_constant (Pmode,
20824 XEXP (XEXP (x, 0), 0), 12));
20826 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20827 if (small_data_operand (x, GET_MODE (x)))
20828 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20829 reg_names[SMALL_DATA_REG]);
20833 /* Print AltiVec memory operand. */
20838 gcc_assert (MEM_P (x));
20842 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20843 && GET_CODE (tmp) == AND
20844 && CONST_INT_P (XEXP (tmp, 1))
20845 && INTVAL (XEXP (tmp, 1)) == -16)
20846 tmp = XEXP (tmp, 0);
20847 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20848 && GET_CODE (tmp) == PRE_MODIFY)
20849 tmp = XEXP (tmp, 1);
20851 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20854 if (GET_CODE (tmp) != PLUS
20855 || !REG_P (XEXP (tmp, 0))
20856 || !REG_P (XEXP (tmp, 1)))
20858 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20862 if (REGNO (XEXP (tmp, 0)) == 0)
20863 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20864 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20866 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20867 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20874 fprintf (file, "%s", reg_names[REGNO (x)]);
20875 else if (MEM_P (x))
20877 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20878 know the width from the mode. */
20879 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20880 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20881 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20882 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20883 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20884 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20885 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20886 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20888 output_address (GET_MODE (x), XEXP (x, 0));
20890 else if (toc_relative_expr_p (x, false,
20891 &tocrel_base_oac, &tocrel_offset_oac))
20892 /* This hack along with a corresponding hack in
20893 rs6000_output_addr_const_extra arranges to output addends
20894 where the assembler expects to find them. eg.
20895 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20896 without this hack would be output as "x@toc+4". We
20898 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20899 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
20900 output_addr_const (file, XVECEXP (x, 0, 0));
20901 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20902 output_addr_const (file, XVECEXP (x, 0, 1));
20904 output_addr_const (file, x);
20908 if (const char *name = get_some_local_dynamic_name ())
20909 assemble_name (file, name);
20911 output_operand_lossage ("'%%&' used without any "
20912 "local dynamic TLS references");
20916 output_operand_lossage ("invalid %%xn code");
20920 /* Print the address of an operand. */
20923 print_operand_address (FILE *file, rtx x)
20926 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20928 /* Is it a pc-relative address? */
20929 else if (pcrel_address (x, Pmode))
20931 HOST_WIDE_INT offset;
20933 if (GET_CODE (x) == CONST)
20936 if (GET_CODE (x) == PLUS)
20938 offset = INTVAL (XEXP (x, 1));
20944 output_addr_const (file, x);
20947 fprintf (file, "%+" PRId64, offset);
20949 fputs ("@pcrel", file);
20951 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
20952 || GET_CODE (x) == LABEL_REF)
20954 output_addr_const (file, x);
20955 if (small_data_operand (x, GET_MODE (x)))
20956 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20957 reg_names[SMALL_DATA_REG]);
20959 gcc_assert (!TARGET_TOC);
20961 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20962 && REG_P (XEXP (x, 1)))
20964 if (REGNO (XEXP (x, 0)) == 0)
20965 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20966 reg_names[ REGNO (XEXP (x, 0)) ]);
20968 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20969 reg_names[ REGNO (XEXP (x, 1)) ]);
20971 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20972 && CONST_INT_P (XEXP (x, 1)))
20973 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20974 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20976 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20977 && CONSTANT_P (XEXP (x, 1)))
20979 fprintf (file, "lo16(");
20980 output_addr_const (file, XEXP (x, 1));
20981 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20985 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20986 && CONSTANT_P (XEXP (x, 1)))
20988 output_addr_const (file, XEXP (x, 1));
20989 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20992 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
20994 /* This hack along with a corresponding hack in
20995 rs6000_output_addr_const_extra arranges to output addends
20996 where the assembler expects to find them. eg.
20998 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20999 without this hack would be output as "x@toc+8@l(9)". We
21000 want "x+8@toc@l(9)". */
21001 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21002 if (GET_CODE (x) == LO_SUM)
21003 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21005 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21008 output_addr_const (file, x);
21011 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21014 rs6000_output_addr_const_extra (FILE *file, rtx x)
21016 if (GET_CODE (x) == UNSPEC)
21017 switch (XINT (x, 1))
21019 case UNSPEC_TOCREL:
21020 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
21021 && REG_P (XVECEXP (x, 0, 1))
21022 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21023 output_addr_const (file, XVECEXP (x, 0, 0));
21024 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21026 if (INTVAL (tocrel_offset_oac) >= 0)
21027 fprintf (file, "+");
21028 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21030 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21033 assemble_name (file, toc_label_name);
21036 else if (TARGET_ELF)
21037 fputs ("@toc", file);
21041 case UNSPEC_MACHOPIC_OFFSET:
21042 output_addr_const (file, XVECEXP (x, 0, 0));
21044 machopic_output_function_base_name (file);
21051 /* Target hook for assembling integer objects. The PowerPC version has
21052 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21053 is defined. It also needs to handle DI-mode objects on 64-bit
21057 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21059 #ifdef RELOCATABLE_NEEDS_FIXUP
21060 /* Special handling for SI values. */
21061 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21063 static int recurse = 0;
21065 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21066 the .fixup section. Since the TOC section is already relocated, we
21067 don't need to mark it here. We used to skip the text section, but it
21068 should never be valid for relocated addresses to be placed in the text
21070 if (DEFAULT_ABI == ABI_V4
21071 && (TARGET_RELOCATABLE || flag_pic > 1)
21072 && in_section != toc_section
21074 && !CONST_SCALAR_INT_P (x)
21080 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21082 ASM_OUTPUT_LABEL (asm_out_file, buf);
21083 fprintf (asm_out_file, "\t.long\t(");
21084 output_addr_const (asm_out_file, x);
21085 fprintf (asm_out_file, ")@fixup\n");
21086 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21087 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21088 fprintf (asm_out_file, "\t.long\t");
21089 assemble_name (asm_out_file, buf);
21090 fprintf (asm_out_file, "\n\t.previous\n");
21094 /* Remove initial .'s to turn a -mcall-aixdesc function
21095 address into the address of the descriptor, not the function
21097 else if (SYMBOL_REF_P (x)
21098 && XSTR (x, 0)[0] == '.'
21099 && DEFAULT_ABI == ABI_AIX)
21101 const char *name = XSTR (x, 0);
21102 while (*name == '.')
21105 fprintf (asm_out_file, "\t.long\t%s\n", name);
21109 #endif /* RELOCATABLE_NEEDS_FIXUP */
21110 return default_assemble_integer (x, size, aligned_p);
21113 /* Return a template string for assembly to emit when making an
21114 external call. FUNOP is the call mem argument operand number. */
21116 static const char *
21117 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
21119 /* -Wformat-overflow workaround, without which gcc thinks that %u
21120 might produce 10 digits. */
21121 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21125 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21127 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21128 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
21129 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21130 sprintf (arg, "(%%&@tlsld)");
21132 gcc_unreachable ();
21135 /* The magic 32768 offset here corresponds to the offset of
21136 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
21138 sprintf (z, "%%z%u%s", funop,
21139 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
21142 static char str[32]; /* 1 spare */
21143 if (rs6000_pcrel_p (cfun))
21144 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
21145 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21146 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21147 sibcall ? "" : "\n\tnop");
21148 else if (DEFAULT_ABI == ABI_V4)
21149 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21150 flag_pic ? "@plt" : "");
21152 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
21153 else if (DEFAULT_ABI == ABI_DARWIN)
21155 /* The cookie is in operand func+2. */
21156 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
21157 int cookie = INTVAL (operands[funop + 2]);
21158 if (cookie & CALL_LONG)
21160 tree funname = get_identifier (XSTR (operands[funop], 0));
21161 tree labelname = get_prev_label (funname);
21162 gcc_checking_assert (labelname && !sibcall);
21164 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
21165 instruction will reach 'foo', otherwise link as 'bl L42'".
21166 "L42" should be a 'branch island', that will do a far jump to
21167 'foo'. Branch islands are generated in
21168 macho_branch_islands(). */
21169 sprintf (str, "jbsr %%z%u,%.10s", funop,
21170 IDENTIFIER_POINTER (labelname));
21173 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
21175 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
21179 gcc_unreachable ();
21184 rs6000_call_template (rtx *operands, unsigned int funop)
21186 return rs6000_call_template_1 (operands, funop, false);
21190 rs6000_sibcall_template (rtx *operands, unsigned int funop)
21192 return rs6000_call_template_1 (operands, funop, true);
21195 /* As above, for indirect calls. */
21197 static const char *
21198 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
21201 /* -Wformat-overflow workaround, without which gcc thinks that %u
21202 might produce 10 digits. Note that -Wformat-overflow will not
21203 currently warn here for str[], so do not rely on a warning to
21204 ensure str[] is correctly sized. */
21205 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21207 /* Currently, funop is either 0 or 1. The maximum string is always
21208 a !speculate 64-bit __tls_get_addr call.
21211 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21212 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
21214 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21215 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
21222 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21223 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21225 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21226 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21233 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21234 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21236 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21237 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21244 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21245 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
21247 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21248 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
21252 static char str[160]; /* 8 spare */
21254 const char *ptrload = TARGET_64BIT ? "d" : "wz";
21256 if (DEFAULT_ABI == ABI_AIX)
21259 ptrload, funop + 2);
21261 /* We don't need the extra code to stop indirect call speculation if
21263 bool speculate = (TARGET_MACHO
21264 || rs6000_speculate_indirect_jumps
21265 || (REG_P (operands[funop])
21266 && REGNO (operands[funop]) == LR_REGNO));
21268 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
21270 const char *rel64 = TARGET_64BIT ? "64" : "";
21273 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21275 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21276 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
21278 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21279 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21282 gcc_unreachable ();
21285 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
21286 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21287 && flag_pic == 2 ? "+32768" : "");
21291 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
21292 tls, rel64, notoc, funop, addend);
21293 s += sprintf (s, "crset 2\n\t");
21296 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
21297 tls, rel64, notoc, funop, addend);
21299 else if (!speculate)
21300 s += sprintf (s, "crset 2\n\t");
21302 if (rs6000_pcrel_p (cfun))
21305 sprintf (s, "b%%T%ul", funop);
21307 sprintf (s, "beq%%T%ul-", funop);
21309 else if (DEFAULT_ABI == ABI_AIX)
21315 funop, ptrload, funop + 3);
21320 funop, ptrload, funop + 3);
21322 else if (DEFAULT_ABI == ABI_ELFv2)
21328 funop, ptrload, funop + 2);
21333 funop, ptrload, funop + 2);
21340 funop, sibcall ? "" : "l");
21344 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
21350 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
21352 return rs6000_indirect_call_template_1 (operands, funop, false);
21356 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
21358 return rs6000_indirect_call_template_1 (operands, funop, true);
21362 /* Output indirect call insns. WHICH identifies the type of sequence. */
21364 rs6000_pltseq_template (rtx *operands, int which)
21366 const char *rel64 = TARGET_64BIT ? "64" : "";
21369 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
21371 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
21372 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
21373 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
21375 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
21376 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
21379 gcc_unreachable ();
21382 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
21383 static char str[96]; /* 10 spare */
21384 char off = WORDS_BIG_ENDIAN ? '2' : '4';
21385 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21386 && flag_pic == 2 ? "+32768" : "");
21389 case RS6000_PLTSEQ_TOCSAVE:
21392 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
21393 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
21396 case RS6000_PLTSEQ_PLT16_HA:
21397 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
21400 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
21404 "addis %%0,%%1,0\n\t"
21405 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
21406 tls, off, rel64, addend);
21408 case RS6000_PLTSEQ_PLT16_LO:
21410 "l%s %%0,0(%%1)\n\t"
21411 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
21412 TARGET_64BIT ? "d" : "wz",
21413 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
21415 case RS6000_PLTSEQ_MTCTR:
21418 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
21419 tls, rel64, addend);
21421 case RS6000_PLTSEQ_PLT_PCREL34:
21423 "pl%s %%0,0(0),1\n\t"
21424 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
21425 TARGET_64BIT ? "d" : "wz",
21429 gcc_unreachable ();
21435 /* Helper function to return whether a MODE can do prefixed loads/stores.
21436 VOIDmode is used when we are loading the pc-relative address into a base
21437 register, but we are not using it as part of a memory operation. As modes
21438 add support for prefixed memory, they will be added here. */
21441 mode_supports_prefixed_address_p (machine_mode mode)
21443 return mode == VOIDmode;
21446 /* Function to return true if ADDR is a valid prefixed memory address that uses
21450 rs6000_prefixed_address (rtx addr, machine_mode mode)
21452 if (!TARGET_PREFIXED_ADDR || !mode_supports_prefixed_address_p (mode))
21455 /* Check for PC-relative addresses. */
21456 if (pcrel_address (addr, Pmode))
21459 /* Check for prefixed memory addresses that have a large numeric offset,
21460 or an offset that can't be used for a DS/DQ-form memory operation. */
21461 if (GET_CODE (addr) == PLUS)
21463 rtx op0 = XEXP (addr, 0);
21464 rtx op1 = XEXP (addr, 1);
21466 if (!base_reg_operand (op0, Pmode) || !CONST_INT_P (op1))
21469 HOST_WIDE_INT value = INTVAL (op1);
21470 if (!SIGNED_34BIT_OFFSET_P (value, 0))
21473 /* Offset larger than 16-bits? */
21474 if (!SIGNED_16BIT_OFFSET_P (value, 0))
21477 /* DQ instruction (bottom 4 bits must be 0) for vectors. */
21478 HOST_WIDE_INT mask;
21479 if (GET_MODE_SIZE (mode) >= 16)
21482 /* DS instruction (bottom 2 bits must be 0). For 32-bit integers, we
21483 need to use DS instructions if we are sign-extending the value with
21484 LWA. For 32-bit floating point, we need DS instructions to load and
21485 store values to the traditional Altivec registers. */
21486 else if (GET_MODE_SIZE (mode) >= 4)
21489 /* QImode/HImode has no restrictions. */
21493 /* Return true if we must use a prefixed instruction. */
21494 return (value & mask) != 0;
21500 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21501 /* Emit an assembler directive to set symbol visibility for DECL to
21502 VISIBILITY_TYPE. */
21505 rs6000_assemble_visibility (tree decl, int vis)
21510 /* Functions need to have their entry point symbol visibility set as
21511 well as their descriptor symbol visibility. */
21512 if (DEFAULT_ABI == ABI_AIX
21514 && TREE_CODE (decl) == FUNCTION_DECL)
21516 static const char * const visibility_types[] = {
21517 NULL, "protected", "hidden", "internal"
21520 const char *name, *type;
21522 name = ((* targetm.strip_name_encoding)
21523 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21524 type = visibility_types[vis];
21526 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21527 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21530 default_assemble_visibility (decl, vis);
21535 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21537 /* Reversal of FP compares takes care -- an ordered compare
21538 becomes an unordered compare and vice versa. */
21539 if (mode == CCFPmode
21540 && (!flag_finite_math_only
21541 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21542 || code == UNEQ || code == LTGT))
21543 return reverse_condition_maybe_unordered (code);
21545 return reverse_condition (code);
21548 /* Generate a compare for CODE. Return a brand-new rtx that
21549 represents the result of the compare. */
21552 rs6000_generate_compare (rtx cmp, machine_mode mode)
21554 machine_mode comp_mode;
21555 rtx compare_result;
21556 enum rtx_code code = GET_CODE (cmp);
21557 rtx op0 = XEXP (cmp, 0);
21558 rtx op1 = XEXP (cmp, 1);
21560 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21561 comp_mode = CCmode;
21562 else if (FLOAT_MODE_P (mode))
21563 comp_mode = CCFPmode;
21564 else if (code == GTU || code == LTU
21565 || code == GEU || code == LEU)
21566 comp_mode = CCUNSmode;
21567 else if ((code == EQ || code == NE)
21568 && unsigned_reg_p (op0)
21569 && (unsigned_reg_p (op1)
21570 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21571 /* These are unsigned values, perhaps there will be a later
21572 ordering compare that can be shared with this one. */
21573 comp_mode = CCUNSmode;
21575 comp_mode = CCmode;
21577 /* If we have an unsigned compare, make sure we don't have a signed value as
21579 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
21580 && INTVAL (op1) < 0)
21582 op0 = copy_rtx_if_shared (op0);
21583 op1 = force_reg (GET_MODE (op0), op1);
21584 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21587 /* First, the compare. */
21588 compare_result = gen_reg_rtx (comp_mode);
21590 /* IEEE 128-bit support in VSX registers when we do not have hardware
21592 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21594 rtx libfunc = NULL_RTX;
21595 bool check_nan = false;
21602 libfunc = optab_libfunc (eq_optab, mode);
21607 libfunc = optab_libfunc (ge_optab, mode);
21612 libfunc = optab_libfunc (le_optab, mode);
21617 libfunc = optab_libfunc (unord_optab, mode);
21618 code = (code == UNORDERED) ? NE : EQ;
21624 libfunc = optab_libfunc (ge_optab, mode);
21625 code = (code == UNGE) ? GE : GT;
21631 libfunc = optab_libfunc (le_optab, mode);
21632 code = (code == UNLE) ? LE : LT;
21638 libfunc = optab_libfunc (eq_optab, mode);
21639 code = (code = UNEQ) ? EQ : NE;
21643 gcc_unreachable ();
21646 gcc_assert (libfunc);
21649 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21650 SImode, op0, mode, op1, mode);
21652 /* The library signals an exception for signalling NaNs, so we need to
21653 handle isgreater, etc. by first checking isordered. */
21656 rtx ne_rtx, normal_dest, unord_dest;
21657 rtx unord_func = optab_libfunc (unord_optab, mode);
21658 rtx join_label = gen_label_rtx ();
21659 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21660 rtx unord_cmp = gen_reg_rtx (comp_mode);
21663 /* Test for either value being a NaN. */
21664 gcc_assert (unord_func);
21665 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21666 SImode, op0, mode, op1, mode);
21668 /* Set value (0) if either value is a NaN, and jump to the join
21670 dest = gen_reg_rtx (SImode);
21671 emit_move_insn (dest, const1_rtx);
21672 emit_insn (gen_rtx_SET (unord_cmp,
21673 gen_rtx_COMPARE (comp_mode, unord_dest,
21676 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21677 emit_jump_insn (gen_rtx_SET (pc_rtx,
21678 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21682 /* Do the normal comparison, knowing that the values are not
21684 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21685 SImode, op0, mode, op1, mode);
21687 emit_insn (gen_cstoresi4 (dest,
21688 gen_rtx_fmt_ee (code, SImode, normal_dest,
21690 normal_dest, const0_rtx));
21692 /* Join NaN and non-Nan paths. Compare dest against 0. */
21693 emit_label (join_label);
21697 emit_insn (gen_rtx_SET (compare_result,
21698 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21703 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21704 CLOBBERs to match cmptf_internal2 pattern. */
21705 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21706 && FLOAT128_IBM_P (GET_MODE (op0))
21707 && TARGET_HARD_FLOAT)
21708 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21710 gen_rtx_SET (compare_result,
21711 gen_rtx_COMPARE (comp_mode, op0, op1)),
21712 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21713 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21714 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21715 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21716 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21717 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21718 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21719 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21720 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21721 else if (GET_CODE (op1) == UNSPEC
21722 && XINT (op1, 1) == UNSPEC_SP_TEST)
21724 rtx op1b = XVECEXP (op1, 0, 0);
21725 comp_mode = CCEQmode;
21726 compare_result = gen_reg_rtx (CCEQmode);
21728 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21730 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21733 emit_insn (gen_rtx_SET (compare_result,
21734 gen_rtx_COMPARE (comp_mode, op0, op1)));
21737 /* Some kinds of FP comparisons need an OR operation;
21738 under flag_finite_math_only we don't bother. */
21739 if (FLOAT_MODE_P (mode)
21740 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21741 && !flag_finite_math_only
21742 && (code == LE || code == GE
21743 || code == UNEQ || code == LTGT
21744 || code == UNGT || code == UNLT))
21746 enum rtx_code or1, or2;
21747 rtx or1_rtx, or2_rtx, compare2_rtx;
21748 rtx or_result = gen_reg_rtx (CCEQmode);
21752 case LE: or1 = LT; or2 = EQ; break;
21753 case GE: or1 = GT; or2 = EQ; break;
21754 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21755 case LTGT: or1 = LT; or2 = GT; break;
21756 case UNGT: or1 = UNORDERED; or2 = GT; break;
21757 case UNLT: or1 = UNORDERED; or2 = LT; break;
21758 default: gcc_unreachable ();
21760 validate_condition_mode (or1, comp_mode);
21761 validate_condition_mode (or2, comp_mode);
21762 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21763 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21764 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21765 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21767 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21769 compare_result = or_result;
21773 validate_condition_mode (code, GET_MODE (compare_result));
21775 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21779 /* Return the diagnostic message string if the binary operation OP is
21780 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21783 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21787 machine_mode mode1 = TYPE_MODE (type1);
21788 machine_mode mode2 = TYPE_MODE (type2);
21790 /* For complex modes, use the inner type. */
21791 if (COMPLEX_MODE_P (mode1))
21792 mode1 = GET_MODE_INNER (mode1);
21794 if (COMPLEX_MODE_P (mode2))
21795 mode2 = GET_MODE_INNER (mode2);
21797 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21798 double to intermix unless -mfloat128-convert. */
21799 if (mode1 == mode2)
21802 if (!TARGET_FLOAT128_CVT)
21804 if ((mode1 == KFmode && mode2 == IFmode)
21805 || (mode1 == IFmode && mode2 == KFmode))
21806 return N_("__float128 and __ibm128 cannot be used in the same "
21809 if (TARGET_IEEEQUAD
21810 && ((mode1 == IFmode && mode2 == TFmode)
21811 || (mode1 == TFmode && mode2 == IFmode)))
21812 return N_("__ibm128 and long double cannot be used in the same "
21815 if (!TARGET_IEEEQUAD
21816 && ((mode1 == KFmode && mode2 == TFmode)
21817 || (mode1 == TFmode && mode2 == KFmode)))
21818 return N_("__float128 and long double cannot be used in the same "
21826 /* Expand floating point conversion to/from __float128 and __ibm128. */
21829 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21831 machine_mode dest_mode = GET_MODE (dest);
21832 machine_mode src_mode = GET_MODE (src);
21833 convert_optab cvt = unknown_optab;
21834 bool do_move = false;
21835 rtx libfunc = NULL_RTX;
21837 typedef rtx (*rtx_2func_t) (rtx, rtx);
21838 rtx_2func_t hw_convert = (rtx_2func_t)0;
21842 rtx_2func_t from_df;
21843 rtx_2func_t from_sf;
21844 rtx_2func_t from_si_sign;
21845 rtx_2func_t from_si_uns;
21846 rtx_2func_t from_di_sign;
21847 rtx_2func_t from_di_uns;
21850 rtx_2func_t to_si_sign;
21851 rtx_2func_t to_si_uns;
21852 rtx_2func_t to_di_sign;
21853 rtx_2func_t to_di_uns;
21854 } hw_conversions[2] = {
21855 /* convertions to/from KFmode */
21857 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21858 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21859 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21860 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21861 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21862 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21863 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21864 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21865 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21866 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21867 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21868 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21871 /* convertions to/from TFmode */
21873 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21874 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21875 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21876 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21877 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21878 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21879 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21880 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21881 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21882 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21883 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21884 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21888 if (dest_mode == src_mode)
21889 gcc_unreachable ();
21891 /* Eliminate memory operations. */
21893 src = force_reg (src_mode, src);
21897 rtx tmp = gen_reg_rtx (dest_mode);
21898 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21899 rs6000_emit_move (dest, tmp, dest_mode);
21903 /* Convert to IEEE 128-bit floating point. */
21904 if (FLOAT128_IEEE_P (dest_mode))
21906 if (dest_mode == KFmode)
21908 else if (dest_mode == TFmode)
21911 gcc_unreachable ();
21917 hw_convert = hw_conversions[kf_or_tf].from_df;
21922 hw_convert = hw_conversions[kf_or_tf].from_sf;
21928 if (FLOAT128_IBM_P (src_mode))
21937 cvt = ufloat_optab;
21938 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21942 cvt = sfloat_optab;
21943 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21950 cvt = ufloat_optab;
21951 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21955 cvt = sfloat_optab;
21956 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21961 gcc_unreachable ();
21965 /* Convert from IEEE 128-bit floating point. */
21966 else if (FLOAT128_IEEE_P (src_mode))
21968 if (src_mode == KFmode)
21970 else if (src_mode == TFmode)
21973 gcc_unreachable ();
21979 hw_convert = hw_conversions[kf_or_tf].to_df;
21984 hw_convert = hw_conversions[kf_or_tf].to_sf;
21990 if (FLOAT128_IBM_P (dest_mode))
22000 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22005 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22013 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22018 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22023 gcc_unreachable ();
22027 /* Both IBM format. */
22028 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22032 gcc_unreachable ();
22034 /* Handle conversion between TFmode/KFmode/IFmode. */
22036 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
22038 /* Handle conversion if we have hardware support. */
22039 else if (TARGET_FLOAT128_HW && hw_convert)
22040 emit_insn ((hw_convert) (dest, src));
22042 /* Call an external function to do the conversion. */
22043 else if (cvt != unknown_optab)
22045 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22046 gcc_assert (libfunc != NULL_RTX);
22048 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22051 gcc_assert (dest2 != NULL_RTX);
22052 if (!rtx_equal_p (dest, dest2))
22053 emit_move_insn (dest, dest2);
22057 gcc_unreachable ();
22063 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22064 can be used as that dest register. Return the dest register. */
22067 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22069 if (op2 == const0_rtx)
22072 if (GET_CODE (scratch) == SCRATCH)
22073 scratch = gen_reg_rtx (mode);
22075 if (logical_operand (op2, mode))
22076 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22078 emit_insn (gen_rtx_SET (scratch,
22079 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22085 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22088 machine_mode op_mode;
22089 enum rtx_code cond_code;
22090 rtx result = operands[0];
22092 condition_rtx = rs6000_generate_compare (operands[1], mode);
22093 cond_code = GET_CODE (condition_rtx);
22095 if (cond_code == NE
22096 || cond_code == GE || cond_code == LE
22097 || cond_code == GEU || cond_code == LEU
22098 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22100 rtx not_result = gen_reg_rtx (CCEQmode);
22101 rtx not_op, rev_cond_rtx;
22102 machine_mode cc_mode;
22104 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22106 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22107 SImode, XEXP (condition_rtx, 0), const0_rtx);
22108 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22109 emit_insn (gen_rtx_SET (not_result, not_op));
22110 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22113 op_mode = GET_MODE (XEXP (operands[1], 0));
22114 if (op_mode == VOIDmode)
22115 op_mode = GET_MODE (XEXP (operands[1], 1));
22117 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22119 PUT_MODE (condition_rtx, DImode);
22120 convert_move (result, condition_rtx, 0);
22124 PUT_MODE (condition_rtx, SImode);
22125 emit_insn (gen_rtx_SET (result, condition_rtx));
22129 /* Emit a branch of kind CODE to location LOC. */
22132 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22134 rtx condition_rtx, loc_ref;
22136 condition_rtx = rs6000_generate_compare (operands[0], mode);
22137 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22138 emit_jump_insn (gen_rtx_SET (pc_rtx,
22139 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22140 loc_ref, pc_rtx)));
22143 /* Return the string to output a conditional branch to LABEL, which is
22144 the operand template of the label, or NULL if the branch is really a
22145 conditional return.
22147 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22148 condition code register and its mode specifies what kind of
22149 comparison we made.
22151 REVERSED is nonzero if we should reverse the sense of the comparison.
22153 INSN is the insn. */
22156 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22158 static char string[64];
22159 enum rtx_code code = GET_CODE (op);
22160 rtx cc_reg = XEXP (op, 0);
22161 machine_mode mode = GET_MODE (cc_reg);
22162 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22163 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22164 int really_reversed = reversed ^ need_longbranch;
22170 validate_condition_mode (code, mode);
22172 /* Work out which way this really branches. We could use
22173 reverse_condition_maybe_unordered here always but this
22174 makes the resulting assembler clearer. */
22175 if (really_reversed)
22177 /* Reversal of FP compares takes care -- an ordered compare
22178 becomes an unordered compare and vice versa. */
22179 if (mode == CCFPmode)
22180 code = reverse_condition_maybe_unordered (code);
22182 code = reverse_condition (code);
22187 /* Not all of these are actually distinct opcodes, but
22188 we distinguish them for clarity of the resulting assembler. */
22189 case NE: case LTGT:
22190 ccode = "ne"; break;
22191 case EQ: case UNEQ:
22192 ccode = "eq"; break;
22194 ccode = "ge"; break;
22195 case GT: case GTU: case UNGT:
22196 ccode = "gt"; break;
22198 ccode = "le"; break;
22199 case LT: case LTU: case UNLT:
22200 ccode = "lt"; break;
22201 case UNORDERED: ccode = "un"; break;
22202 case ORDERED: ccode = "nu"; break;
22203 case UNGE: ccode = "nl"; break;
22204 case UNLE: ccode = "ng"; break;
22206 gcc_unreachable ();
22209 /* Maybe we have a guess as to how likely the branch is. */
22211 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22212 if (note != NULL_RTX)
22214 /* PROB is the difference from 50%. */
22215 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22216 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22218 /* Only hint for highly probable/improbable branches on newer cpus when
22219 we have real profile data, as static prediction overrides processor
22220 dynamic prediction. For older cpus we may as well always hint, but
22221 assume not taken for branches that are very close to 50% as a
22222 mispredicted taken branch is more expensive than a
22223 mispredicted not-taken branch. */
22224 if (rs6000_always_hint
22225 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22226 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22227 && br_prob_note_reliable_p (note)))
22229 if (abs (prob) > REG_BR_PROB_BASE / 20
22230 && ((prob > 0) ^ need_longbranch))
22238 s += sprintf (s, "b%slr%s ", ccode, pred);
22240 s += sprintf (s, "b%s%s ", ccode, pred);
22242 /* We need to escape any '%' characters in the reg_names string.
22243 Assume they'd only be the first character.... */
22244 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22246 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22250 /* If the branch distance was too far, we may have to use an
22251 unconditional branch to go the distance. */
22252 if (need_longbranch)
22253 s += sprintf (s, ",$+8\n\tb %s", label);
22255 s += sprintf (s, ",%s", label);
22261 /* Return insn for VSX or Altivec comparisons. */
22264 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22267 machine_mode mode = GET_MODE (op0);
22275 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22286 mask = gen_reg_rtx (mode);
22287 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22294 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22295 DMODE is expected destination mode. This is a recursive function. */
22298 rs6000_emit_vector_compare (enum rtx_code rcode,
22300 machine_mode dmode)
22303 bool swap_operands = false;
22304 bool try_again = false;
22306 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22307 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22309 /* See if the comparison works as is. */
22310 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22318 swap_operands = true;
22323 swap_operands = true;
22331 /* Invert condition and try again.
22332 e.g., A != B becomes ~(A==B). */
22334 enum rtx_code rev_code;
22335 enum insn_code nor_code;
22338 rev_code = reverse_condition_maybe_unordered (rcode);
22339 if (rev_code == UNKNOWN)
22342 nor_code = optab_handler (one_cmpl_optab, dmode);
22343 if (nor_code == CODE_FOR_nothing)
22346 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22350 mask = gen_reg_rtx (dmode);
22351 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22359 /* Try GT/GTU/LT/LTU OR EQ */
22362 enum insn_code ior_code;
22363 enum rtx_code new_code;
22384 gcc_unreachable ();
22387 ior_code = optab_handler (ior_optab, dmode);
22388 if (ior_code == CODE_FOR_nothing)
22391 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22395 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22399 mask = gen_reg_rtx (dmode);
22400 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22411 std::swap (op0, op1);
22413 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22418 /* You only get two chances. */
22422 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22423 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22424 operands for the relation operation COND. */
22427 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22428 rtx cond, rtx cc_op0, rtx cc_op1)
22430 machine_mode dest_mode = GET_MODE (dest);
22431 machine_mode mask_mode = GET_MODE (cc_op0);
22432 enum rtx_code rcode = GET_CODE (cond);
22433 machine_mode cc_mode = CCmode;
22436 bool invert_move = false;
22438 if (VECTOR_UNIT_NONE_P (dest_mode))
22441 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22442 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22446 /* Swap operands if we can, and fall back to doing the operation as
22447 specified, and doing a NOR to invert the test. */
22453 /* Invert condition and try again.
22454 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22455 invert_move = true;
22456 rcode = reverse_condition_maybe_unordered (rcode);
22457 if (rcode == UNKNOWN)
22463 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22465 /* Invert condition to avoid compound test. */
22466 invert_move = true;
22467 rcode = reverse_condition (rcode);
22475 /* Mark unsigned tests with CCUNSmode. */
22476 cc_mode = CCUNSmode;
22478 /* Invert condition to avoid compound test if necessary. */
22479 if (rcode == GEU || rcode == LEU)
22481 invert_move = true;
22482 rcode = reverse_condition (rcode);
22490 /* Get the vector mask for the given relational operations. */
22491 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22497 std::swap (op_true, op_false);
22499 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22500 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22501 && (GET_CODE (op_true) == CONST_VECTOR
22502 || GET_CODE (op_false) == CONST_VECTOR))
22504 rtx constant_0 = CONST0_RTX (dest_mode);
22505 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22507 if (op_true == constant_m1 && op_false == constant_0)
22509 emit_move_insn (dest, mask);
22513 else if (op_true == constant_0 && op_false == constant_m1)
22515 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22519 /* If we can't use the vector comparison directly, perhaps we can use
22520 the mask for the true or false fields, instead of loading up a
22522 if (op_true == constant_m1)
22525 if (op_false == constant_0)
22529 if (!REG_P (op_true) && !SUBREG_P (op_true))
22530 op_true = force_reg (dest_mode, op_true);
22532 if (!REG_P (op_false) && !SUBREG_P (op_false))
22533 op_false = force_reg (dest_mode, op_false);
22535 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22536 CONST0_RTX (dest_mode));
22537 emit_insn (gen_rtx_SET (dest,
22538 gen_rtx_IF_THEN_ELSE (dest_mode,
22545 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22546 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22547 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22548 hardware has no such operation. */
22551 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22553 enum rtx_code code = GET_CODE (op);
22554 rtx op0 = XEXP (op, 0);
22555 rtx op1 = XEXP (op, 1);
22556 machine_mode compare_mode = GET_MODE (op0);
22557 machine_mode result_mode = GET_MODE (dest);
22558 bool max_p = false;
22560 if (result_mode != compare_mode)
22563 if (code == GE || code == GT)
22565 else if (code == LE || code == LT)
22570 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22573 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22579 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22583 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22584 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22585 operands of the last comparison is nonzero/true, FALSE_COND if it is
22586 zero/false. Return 0 if the hardware has no such operation. */
22589 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22591 enum rtx_code code = GET_CODE (op);
22592 rtx op0 = XEXP (op, 0);
22593 rtx op1 = XEXP (op, 1);
22594 machine_mode result_mode = GET_MODE (dest);
22599 if (!can_create_pseudo_p ())
22612 code = swap_condition (code);
22613 std::swap (op0, op1);
22620 /* Generate: [(parallel [(set (dest)
22621 (if_then_else (op (cmp1) (cmp2))
22624 (clobber (scratch))])]. */
22626 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22627 cmove_rtx = gen_rtx_SET (dest,
22628 gen_rtx_IF_THEN_ELSE (result_mode,
22633 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22634 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22635 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22640 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22641 operands of the last comparison is nonzero/true, FALSE_COND if it
22642 is zero/false. Return 0 if the hardware has no such operation. */
22645 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22647 enum rtx_code code = GET_CODE (op);
22648 rtx op0 = XEXP (op, 0);
22649 rtx op1 = XEXP (op, 1);
22650 machine_mode compare_mode = GET_MODE (op0);
22651 machine_mode result_mode = GET_MODE (dest);
22653 bool is_against_zero;
22655 /* These modes should always match. */
22656 if (GET_MODE (op1) != compare_mode
22657 /* In the isel case however, we can use a compare immediate, so
22658 op1 may be a small constant. */
22659 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22661 if (GET_MODE (true_cond) != result_mode)
22663 if (GET_MODE (false_cond) != result_mode)
22666 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22667 if (TARGET_P9_MINMAX
22668 && (compare_mode == SFmode || compare_mode == DFmode)
22669 && (result_mode == SFmode || result_mode == DFmode))
22671 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22674 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22678 /* Don't allow using floating point comparisons for integer results for
22680 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22683 /* First, work out if the hardware can do this at all, or
22684 if it's too slow.... */
22685 if (!FLOAT_MODE_P (compare_mode))
22688 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22692 is_against_zero = op1 == CONST0_RTX (compare_mode);
22694 /* A floating-point subtract might overflow, underflow, or produce
22695 an inexact result, thus changing the floating-point flags, so it
22696 can't be generated if we care about that. It's safe if one side
22697 of the construct is zero, since then no subtract will be
22699 if (SCALAR_FLOAT_MODE_P (compare_mode)
22700 && flag_trapping_math && ! is_against_zero)
22703 /* Eliminate half of the comparisons by switching operands, this
22704 makes the remaining code simpler. */
22705 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22706 || code == LTGT || code == LT || code == UNLE)
22708 code = reverse_condition_maybe_unordered (code);
22710 true_cond = false_cond;
22714 /* UNEQ and LTGT take four instructions for a comparison with zero,
22715 it'll probably be faster to use a branch here too. */
22716 if (code == UNEQ && HONOR_NANS (compare_mode))
22719 /* We're going to try to implement comparisons by performing
22720 a subtract, then comparing against zero. Unfortunately,
22721 Inf - Inf is NaN which is not zero, and so if we don't
22722 know that the operand is finite and the comparison
22723 would treat EQ different to UNORDERED, we can't do it. */
22724 if (HONOR_INFINITIES (compare_mode)
22725 && code != GT && code != UNGE
22726 && (!CONST_DOUBLE_P (op1)
22727 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22728 /* Constructs of the form (a OP b ? a : b) are safe. */
22729 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22730 || (! rtx_equal_p (op0, true_cond)
22731 && ! rtx_equal_p (op1, true_cond))))
22734 /* At this point we know we can use fsel. */
22736 /* Reduce the comparison to a comparison against zero. */
22737 if (! is_against_zero)
22739 temp = gen_reg_rtx (compare_mode);
22740 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22742 op1 = CONST0_RTX (compare_mode);
22745 /* If we don't care about NaNs we can reduce some of the comparisons
22746 down to faster ones. */
22747 if (! HONOR_NANS (compare_mode))
22753 true_cond = false_cond;
22766 /* Now, reduce everything down to a GE. */
22773 temp = gen_reg_rtx (compare_mode);
22774 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22779 temp = gen_reg_rtx (compare_mode);
22780 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22785 temp = gen_reg_rtx (compare_mode);
22786 emit_insn (gen_rtx_SET (temp,
22787 gen_rtx_NEG (compare_mode,
22788 gen_rtx_ABS (compare_mode, op0))));
22793 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22794 temp = gen_reg_rtx (result_mode);
22795 emit_insn (gen_rtx_SET (temp,
22796 gen_rtx_IF_THEN_ELSE (result_mode,
22797 gen_rtx_GE (VOIDmode,
22799 true_cond, false_cond)));
22800 false_cond = true_cond;
22803 temp = gen_reg_rtx (compare_mode);
22804 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22809 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22810 temp = gen_reg_rtx (result_mode);
22811 emit_insn (gen_rtx_SET (temp,
22812 gen_rtx_IF_THEN_ELSE (result_mode,
22813 gen_rtx_GE (VOIDmode,
22815 true_cond, false_cond)));
22816 true_cond = false_cond;
22819 temp = gen_reg_rtx (compare_mode);
22820 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22825 gcc_unreachable ();
22828 emit_insn (gen_rtx_SET (dest,
22829 gen_rtx_IF_THEN_ELSE (result_mode,
22830 gen_rtx_GE (VOIDmode,
22832 true_cond, false_cond)));
22836 /* Same as above, but for ints (isel). */
22839 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22841 rtx condition_rtx, cr;
22842 machine_mode mode = GET_MODE (dest);
22843 enum rtx_code cond_code;
22844 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22847 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22850 /* We still have to do the compare, because isel doesn't do a
22851 compare, it just looks at the CRx bits set by a previous compare
22853 condition_rtx = rs6000_generate_compare (op, mode);
22854 cond_code = GET_CODE (condition_rtx);
22855 cr = XEXP (condition_rtx, 0);
22856 signedp = GET_MODE (cr) == CCmode;
22858 isel_func = (mode == SImode
22859 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22860 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22864 case LT: case GT: case LTU: case GTU: case EQ:
22865 /* isel handles these directly. */
22869 /* We need to swap the sense of the comparison. */
22871 std::swap (false_cond, true_cond);
22872 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22877 false_cond = force_reg (mode, false_cond);
22878 if (true_cond != const0_rtx)
22879 true_cond = force_reg (mode, true_cond);
22881 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22887 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22889 machine_mode mode = GET_MODE (op0);
22893 /* VSX/altivec have direct min/max insns. */
22894 if ((code == SMAX || code == SMIN)
22895 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22896 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22898 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22902 if (code == SMAX || code == SMIN)
22907 if (code == SMAX || code == UMAX)
22908 target = emit_conditional_move (dest, c, op0, op1, mode,
22909 op0, op1, mode, 0);
22911 target = emit_conditional_move (dest, c, op0, op1, mode,
22912 op1, op0, mode, 0);
22913 gcc_assert (target);
22914 if (target != dest)
22915 emit_move_insn (dest, target);
22918 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22919 COND is true. Mark the jump as unlikely to be taken. */
22922 emit_unlikely_jump (rtx cond, rtx label)
22924 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22925 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22926 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22929 /* A subroutine of the atomic operation splitters. Emit a load-locked
22930 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22931 the zero_extend operation. */
22934 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22936 rtx (*fn) (rtx, rtx) = NULL;
22941 fn = gen_load_lockedqi;
22944 fn = gen_load_lockedhi;
22947 if (GET_MODE (mem) == QImode)
22948 fn = gen_load_lockedqi_si;
22949 else if (GET_MODE (mem) == HImode)
22950 fn = gen_load_lockedhi_si;
22952 fn = gen_load_lockedsi;
22955 fn = gen_load_lockeddi;
22958 fn = gen_load_lockedti;
22961 gcc_unreachable ();
22963 emit_insn (fn (reg, mem));
22966 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22967 instruction in MODE. */
22970 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22972 rtx (*fn) (rtx, rtx, rtx) = NULL;
22977 fn = gen_store_conditionalqi;
22980 fn = gen_store_conditionalhi;
22983 fn = gen_store_conditionalsi;
22986 fn = gen_store_conditionaldi;
22989 fn = gen_store_conditionalti;
22992 gcc_unreachable ();
22995 /* Emit sync before stwcx. to address PPC405 Erratum. */
22996 if (PPC405_ERRATUM77)
22997 emit_insn (gen_hwsync ());
22999 emit_insn (fn (res, mem, val));
23002 /* Expand barriers before and after a load_locked/store_cond sequence. */
23005 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23007 rtx addr = XEXP (mem, 0);
23009 if (!legitimate_indirect_address_p (addr, reload_completed)
23010 && !legitimate_indexed_address_p (addr, reload_completed))
23012 addr = force_reg (Pmode, addr);
23013 mem = replace_equiv_address_nv (mem, addr);
23018 case MEMMODEL_RELAXED:
23019 case MEMMODEL_CONSUME:
23020 case MEMMODEL_ACQUIRE:
23022 case MEMMODEL_RELEASE:
23023 case MEMMODEL_ACQ_REL:
23024 emit_insn (gen_lwsync ());
23026 case MEMMODEL_SEQ_CST:
23027 emit_insn (gen_hwsync ());
23030 gcc_unreachable ();
23036 rs6000_post_atomic_barrier (enum memmodel model)
23040 case MEMMODEL_RELAXED:
23041 case MEMMODEL_CONSUME:
23042 case MEMMODEL_RELEASE:
23044 case MEMMODEL_ACQUIRE:
23045 case MEMMODEL_ACQ_REL:
23046 case MEMMODEL_SEQ_CST:
23047 emit_insn (gen_isync ());
23050 gcc_unreachable ();
23054 /* A subroutine of the various atomic expanders. For sub-word operations,
23055 we must adjust things to operate on SImode. Given the original MEM,
23056 return a new aligned memory. Also build and return the quantities by
23057 which to shift and mask. */
23060 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23062 rtx addr, align, shift, mask, mem;
23063 HOST_WIDE_INT shift_mask;
23064 machine_mode mode = GET_MODE (orig_mem);
23066 /* For smaller modes, we have to implement this via SImode. */
23067 shift_mask = (mode == QImode ? 0x18 : 0x10);
23069 addr = XEXP (orig_mem, 0);
23070 addr = force_reg (GET_MODE (addr), addr);
23072 /* Aligned memory containing subword. Generate a new memory. We
23073 do not want any of the existing MEM_ATTR data, as we're now
23074 accessing memory outside the original object. */
23075 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23076 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23077 mem = gen_rtx_MEM (SImode, align);
23078 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23079 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23080 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23082 /* Shift amount for subword relative to aligned word. */
23083 shift = gen_reg_rtx (SImode);
23084 addr = gen_lowpart (SImode, addr);
23085 rtx tmp = gen_reg_rtx (SImode);
23086 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23087 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23088 if (BYTES_BIG_ENDIAN)
23089 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23090 shift, 1, OPTAB_LIB_WIDEN);
23093 /* Mask for insertion. */
23094 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23095 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23101 /* A subroutine of the various atomic expanders. For sub-word operands,
23102 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23105 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23109 x = gen_reg_rtx (SImode);
23110 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23111 gen_rtx_NOT (SImode, mask),
23114 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23119 /* A subroutine of the various atomic expanders. For sub-word operands,
23120 extract WIDE to NARROW via SHIFT. */
23123 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23125 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23126 wide, 1, OPTAB_LIB_WIDEN);
23127 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23130 /* Expand an atomic compare and swap operation. */
23133 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23135 rtx boolval, retval, mem, oldval, newval, cond;
23136 rtx label1, label2, x, mask, shift;
23137 machine_mode mode, orig_mode;
23138 enum memmodel mod_s, mod_f;
23141 boolval = operands[0];
23142 retval = operands[1];
23144 oldval = operands[3];
23145 newval = operands[4];
23146 is_weak = (INTVAL (operands[5]) != 0);
23147 mod_s = memmodel_base (INTVAL (operands[6]));
23148 mod_f = memmodel_base (INTVAL (operands[7]));
23149 orig_mode = mode = GET_MODE (mem);
23151 mask = shift = NULL_RTX;
23152 if (mode == QImode || mode == HImode)
23154 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23155 lwarx and shift/mask operations. With power8, we need to do the
23156 comparison in SImode, but the store is still done in QI/HImode. */
23157 oldval = convert_modes (SImode, mode, oldval, 1);
23159 if (!TARGET_SYNC_HI_QI)
23161 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23163 /* Shift and mask OLDVAL into position with the word. */
23164 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23165 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23167 /* Shift and mask NEWVAL into position within the word. */
23168 newval = convert_modes (SImode, mode, newval, 1);
23169 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23170 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23173 /* Prepare to adjust the return value. */
23174 retval = gen_reg_rtx (SImode);
23177 else if (reg_overlap_mentioned_p (retval, oldval))
23178 oldval = copy_to_reg (oldval);
23180 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23181 oldval = copy_to_mode_reg (mode, oldval);
23183 if (reg_overlap_mentioned_p (retval, newval))
23184 newval = copy_to_reg (newval);
23186 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23191 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23192 emit_label (XEXP (label1, 0));
23194 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23196 emit_load_locked (mode, retval, mem);
23200 x = expand_simple_binop (SImode, AND, retval, mask,
23201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23203 cond = gen_reg_rtx (CCmode);
23204 /* If we have TImode, synthesize a comparison. */
23205 if (mode != TImode)
23206 x = gen_rtx_COMPARE (CCmode, x, oldval);
23209 rtx xor1_result = gen_reg_rtx (DImode);
23210 rtx xor2_result = gen_reg_rtx (DImode);
23211 rtx or_result = gen_reg_rtx (DImode);
23212 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23213 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23214 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23215 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23217 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23218 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23219 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23220 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23223 emit_insn (gen_rtx_SET (cond, x));
23225 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23226 emit_unlikely_jump (x, label2);
23230 x = rs6000_mask_atomic_subword (retval, newval, mask);
23232 emit_store_conditional (orig_mode, cond, mem, x);
23236 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23237 emit_unlikely_jump (x, label1);
23240 if (!is_mm_relaxed (mod_f))
23241 emit_label (XEXP (label2, 0));
23243 rs6000_post_atomic_barrier (mod_s);
23245 if (is_mm_relaxed (mod_f))
23246 emit_label (XEXP (label2, 0));
23249 rs6000_finish_atomic_subword (operands[1], retval, shift);
23250 else if (mode != GET_MODE (operands[1]))
23251 convert_move (operands[1], retval, 1);
23253 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23254 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23255 emit_insn (gen_rtx_SET (boolval, x));
23258 /* Expand an atomic exchange operation. */
23261 rs6000_expand_atomic_exchange (rtx operands[])
23263 rtx retval, mem, val, cond;
23265 enum memmodel model;
23266 rtx label, x, mask, shift;
23268 retval = operands[0];
23271 model = memmodel_base (INTVAL (operands[3]));
23272 mode = GET_MODE (mem);
23274 mask = shift = NULL_RTX;
23275 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23277 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23279 /* Shift and mask VAL into position with the word. */
23280 val = convert_modes (SImode, mode, val, 1);
23281 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23282 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23284 /* Prepare to adjust the return value. */
23285 retval = gen_reg_rtx (SImode);
23289 mem = rs6000_pre_atomic_barrier (mem, model);
23291 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23292 emit_label (XEXP (label, 0));
23294 emit_load_locked (mode, retval, mem);
23298 x = rs6000_mask_atomic_subword (retval, val, mask);
23300 cond = gen_reg_rtx (CCmode);
23301 emit_store_conditional (mode, cond, mem, x);
23303 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23304 emit_unlikely_jump (x, label);
23306 rs6000_post_atomic_barrier (model);
23309 rs6000_finish_atomic_subword (operands[0], retval, shift);
23312 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23313 to perform. MEM is the memory on which to operate. VAL is the second
23314 operand of the binary operator. BEFORE and AFTER are optional locations to
23315 return the value of MEM either before of after the operation. MODEL_RTX
23316 is a CONST_INT containing the memory model to use. */
23319 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23320 rtx orig_before, rtx orig_after, rtx model_rtx)
23322 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23323 machine_mode mode = GET_MODE (mem);
23324 machine_mode store_mode = mode;
23325 rtx label, x, cond, mask, shift;
23326 rtx before = orig_before, after = orig_after;
23328 mask = shift = NULL_RTX;
23329 /* On power8, we want to use SImode for the operation. On previous systems,
23330 use the operation in a subword and shift/mask to get the proper byte or
23332 if (mode == QImode || mode == HImode)
23334 if (TARGET_SYNC_HI_QI)
23336 val = convert_modes (SImode, mode, val, 1);
23338 /* Prepare to adjust the return value. */
23339 before = gen_reg_rtx (SImode);
23341 after = gen_reg_rtx (SImode);
23346 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23348 /* Shift and mask VAL into position with the word. */
23349 val = convert_modes (SImode, mode, val, 1);
23350 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23351 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23357 /* We've already zero-extended VAL. That is sufficient to
23358 make certain that it does not affect other bits. */
23363 /* If we make certain that all of the other bits in VAL are
23364 set, that will be sufficient to not affect other bits. */
23365 x = gen_rtx_NOT (SImode, mask);
23366 x = gen_rtx_IOR (SImode, x, val);
23367 emit_insn (gen_rtx_SET (val, x));
23374 /* These will all affect bits outside the field and need
23375 adjustment via MASK within the loop. */
23379 gcc_unreachable ();
23382 /* Prepare to adjust the return value. */
23383 before = gen_reg_rtx (SImode);
23385 after = gen_reg_rtx (SImode);
23386 store_mode = mode = SImode;
23390 mem = rs6000_pre_atomic_barrier (mem, model);
23392 label = gen_label_rtx ();
23393 emit_label (label);
23394 label = gen_rtx_LABEL_REF (VOIDmode, label);
23396 if (before == NULL_RTX)
23397 before = gen_reg_rtx (mode);
23399 emit_load_locked (mode, before, mem);
23403 x = expand_simple_binop (mode, AND, before, val,
23404 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23405 after = expand_simple_unop (mode, NOT, x, after, 1);
23409 after = expand_simple_binop (mode, code, before, val,
23410 after, 1, OPTAB_LIB_WIDEN);
23416 x = expand_simple_binop (SImode, AND, after, mask,
23417 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23418 x = rs6000_mask_atomic_subword (before, x, mask);
23420 else if (store_mode != mode)
23421 x = convert_modes (store_mode, mode, x, 1);
23423 cond = gen_reg_rtx (CCmode);
23424 emit_store_conditional (store_mode, cond, mem, x);
23426 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23427 emit_unlikely_jump (x, label);
23429 rs6000_post_atomic_barrier (model);
23433 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23434 then do the calcuations in a SImode register. */
23436 rs6000_finish_atomic_subword (orig_before, before, shift);
23438 rs6000_finish_atomic_subword (orig_after, after, shift);
23440 else if (store_mode != mode)
23442 /* QImode/HImode on machines with lbarx/lharx where we do the native
23443 operation and then do the calcuations in a SImode register. */
23445 convert_move (orig_before, before, 1);
23447 convert_move (orig_after, after, 1);
23449 else if (orig_after && after != orig_after)
23450 emit_move_insn (orig_after, after);
23453 /* Emit instructions to move SRC to DST. Called by splitters for
23454 multi-register moves. It will emit at most one instruction for
23455 each register that is accessed; that is, it won't emit li/lis pairs
23456 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23460 rs6000_split_multireg_move (rtx dst, rtx src)
23462 /* The register number of the first register being moved. */
23464 /* The mode that is to be moved. */
23466 /* The mode that the move is being done in, and its size. */
23467 machine_mode reg_mode;
23469 /* The number of registers that will be moved. */
23472 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23473 mode = GET_MODE (dst);
23474 nregs = hard_regno_nregs (reg, mode);
23475 if (FP_REGNO_P (reg))
23476 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23477 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23478 else if (ALTIVEC_REGNO_P (reg))
23479 reg_mode = V16QImode;
23481 reg_mode = word_mode;
23482 reg_mode_size = GET_MODE_SIZE (reg_mode);
23484 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23486 /* TDmode residing in FP registers is special, since the ISA requires that
23487 the lower-numbered word of a register pair is always the most significant
23488 word, even in little-endian mode. This does not match the usual subreg
23489 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23490 the appropriate constituent registers "by hand" in little-endian mode.
23492 Note we do not need to check for destructive overlap here since TDmode
23493 can only reside in even/odd register pairs. */
23494 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23499 for (i = 0; i < nregs; i++)
23501 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23502 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23504 p_src = simplify_gen_subreg (reg_mode, src, mode,
23505 i * reg_mode_size);
23507 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23508 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23510 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23511 i * reg_mode_size);
23513 emit_insn (gen_rtx_SET (p_dst, p_src));
23519 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23521 /* Move register range backwards, if we might have destructive
23524 for (i = nregs - 1; i >= 0; i--)
23525 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23526 i * reg_mode_size),
23527 simplify_gen_subreg (reg_mode, src, mode,
23528 i * reg_mode_size)));
23534 bool used_update = false;
23535 rtx restore_basereg = NULL_RTX;
23537 if (MEM_P (src) && INT_REGNO_P (reg))
23541 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23542 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23545 breg = XEXP (XEXP (src, 0), 0);
23546 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23547 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23548 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23549 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23550 src = replace_equiv_address (src, breg);
23552 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23554 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23556 rtx basereg = XEXP (XEXP (src, 0), 0);
23559 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23560 emit_insn (gen_rtx_SET (ndst,
23561 gen_rtx_MEM (reg_mode,
23563 used_update = true;
23566 emit_insn (gen_rtx_SET (basereg,
23567 XEXP (XEXP (src, 0), 1)));
23568 src = replace_equiv_address (src, basereg);
23572 rtx basereg = gen_rtx_REG (Pmode, reg);
23573 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23574 src = replace_equiv_address (src, basereg);
23578 breg = XEXP (src, 0);
23579 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23580 breg = XEXP (breg, 0);
23582 /* If the base register we are using to address memory is
23583 also a destination reg, then change that register last. */
23585 && REGNO (breg) >= REGNO (dst)
23586 && REGNO (breg) < REGNO (dst) + nregs)
23587 j = REGNO (breg) - REGNO (dst);
23589 else if (MEM_P (dst) && INT_REGNO_P (reg))
23593 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23594 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23597 breg = XEXP (XEXP (dst, 0), 0);
23598 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23599 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23600 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23602 /* We have to update the breg before doing the store.
23603 Use store with update, if available. */
23607 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23608 emit_insn (TARGET_32BIT
23609 ? (TARGET_POWERPC64
23610 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23611 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
23612 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23613 used_update = true;
23616 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23617 dst = replace_equiv_address (dst, breg);
23619 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23620 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23622 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23624 rtx basereg = XEXP (XEXP (dst, 0), 0);
23627 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23628 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23631 used_update = true;
23634 emit_insn (gen_rtx_SET (basereg,
23635 XEXP (XEXP (dst, 0), 1)));
23636 dst = replace_equiv_address (dst, basereg);
23640 rtx basereg = XEXP (XEXP (dst, 0), 0);
23641 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23642 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23644 && REG_P (offsetreg)
23645 && REGNO (basereg) != REGNO (offsetreg));
23646 if (REGNO (basereg) == 0)
23648 rtx tmp = offsetreg;
23649 offsetreg = basereg;
23652 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23653 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23654 dst = replace_equiv_address (dst, basereg);
23657 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23658 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23661 for (i = 0; i < nregs; i++)
23663 /* Calculate index to next subword. */
23668 /* If compiler already emitted move of first word by
23669 store with update, no need to do anything. */
23670 if (j == 0 && used_update)
23673 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23674 j * reg_mode_size),
23675 simplify_gen_subreg (reg_mode, src, mode,
23676 j * reg_mode_size)));
23678 if (restore_basereg != NULL_RTX)
23679 emit_insn (restore_basereg);
23683 static GTY(()) alias_set_type set = -1;
23686 get_TOC_alias_set (void)
23689 set = new_alias_set ();
23693 /* Return the internal arg pointer used for function incoming
23694 arguments. When -fsplit-stack, the arg pointer is r12 so we need
23695 to copy it to a pseudo in order for it to be preserved over calls
23696 and suchlike. We'd really like to use a pseudo here for the
23697 internal arg pointer but data-flow analysis is not prepared to
23698 accept pseudos as live at the beginning of a function. */
23701 rs6000_internal_arg_pointer (void)
23703 if (flag_split_stack
23704 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
23708 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
23712 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
23713 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
23715 /* Put the pseudo initialization right after the note at the
23716 beginning of the function. */
23717 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
23718 gen_rtx_REG (Pmode, 12));
23719 push_topmost_sequence ();
23720 emit_insn_after (pat, get_insns ());
23721 pop_topmost_sequence ();
23723 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
23724 FIRST_PARM_OFFSET (current_function_decl));
23725 return copy_to_reg (ret);
23727 return virtual_incoming_args_rtx;
23730 /* We may have to tell the dataflow pass that the split stack prologue
23731 is initializing a register. */
23734 rs6000_live_on_entry (bitmap regs)
23736 if (flag_split_stack)
23737 bitmap_set_bit (regs, 12);
23741 /* A C compound statement that outputs the assembler code for a thunk
23742 function, used to implement C++ virtual function calls with
23743 multiple inheritance. The thunk acts as a wrapper around a virtual
23744 function, adjusting the implicit object parameter before handing
23745 control off to the real function.
23747 First, emit code to add the integer DELTA to the location that
23748 contains the incoming first argument. Assume that this argument
23749 contains a pointer, and is the one used to pass the `this' pointer
23750 in C++. This is the incoming argument *before* the function
23751 prologue, e.g. `%o0' on a sparc. The addition must preserve the
23752 values of all other incoming arguments.
23754 After the addition, emit code to jump to FUNCTION, which is a
23755 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
23756 not touch the return address. Hence returning from FUNCTION will
23757 return to whoever called the current `thunk'.
23759 The effect must be as if FUNCTION had been called directly with the
23760 adjusted first argument. This macro is responsible for emitting
23761 all of the code for a thunk function; output_function_prologue()
23762 and output_function_epilogue() are not invoked.
23764 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
23765 been extracted from it.) It might possibly be useful on some
23766 targets, but probably not.
23768 If you do not define this macro, the target-independent code in the
23769 C++ frontend will generate a less efficient heavyweight thunk that
23770 calls FUNCTION instead of jumping to it. The generic approach does
23771 not support varargs. */
23774 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
23775 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
23778 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23779 rtx this_rtx, funexp;
23782 reload_completed = 1;
23783 epilogue_completed = 1;
23785 /* Mark the end of the (empty) prologue. */
23786 emit_note (NOTE_INSN_PROLOGUE_END);
23788 /* Find the "this" pointer. If the function returns a structure,
23789 the structure return pointer is in r3. */
23790 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
23791 this_rtx = gen_rtx_REG (Pmode, 4);
23793 this_rtx = gen_rtx_REG (Pmode, 3);
23795 /* Apply the constant offset, if required. */
23797 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
23799 /* Apply the offset from the vtable, if required. */
23802 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
23803 rtx tmp = gen_rtx_REG (Pmode, 12);
23805 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
23806 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
23808 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
23809 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
23813 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
23815 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
23817 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
23820 /* Generate a tail call to the target function. */
23821 if (!TREE_USED (function))
23823 assemble_external (function);
23824 TREE_USED (function) = 1;
23826 funexp = XEXP (DECL_RTL (function), 0);
23827 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
23830 if (MACHOPIC_INDIRECT)
23831 funexp = machopic_indirect_call_target (funexp);
23834 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
23835 generate sibcall RTL explicitly. */
23836 insn = emit_call_insn (
23837 gen_rtx_PARALLEL (VOIDmode,
23839 gen_rtx_CALL (VOIDmode,
23840 funexp, const0_rtx),
23841 gen_rtx_USE (VOIDmode, const0_rtx),
23842 simple_return_rtx)));
23843 SIBLING_CALL_P (insn) = 1;
23846 /* Run just enough of rest_of_compilation to get the insns emitted.
23847 There's not really enough bulk here to make other passes such as
23848 instruction scheduling worth while. */
23849 insn = get_insns ();
23850 shorten_branches (insn);
23851 assemble_start_function (thunk_fndecl, fnname);
23852 final_start_function (insn, file, 1);
23853 final (insn, file, 1);
23854 final_end_function ();
23855 assemble_end_function (thunk_fndecl, fnname);
23857 reload_completed = 0;
23858 epilogue_completed = 0;
23861 /* A quick summary of the various types of 'constant-pool tables'
23864 Target Flags Name One table per
23865 AIX (none) AIX TOC object file
23866 AIX -mfull-toc AIX TOC object file
23867 AIX -mminimal-toc AIX minimal TOC translation unit
23868 SVR4/EABI (none) SVR4 SDATA object file
23869 SVR4/EABI -fpic SVR4 pic object file
23870 SVR4/EABI -fPIC SVR4 PIC translation unit
23871 SVR4/EABI -mrelocatable EABI TOC function
23872 SVR4/EABI -maix AIX TOC object file
23873 SVR4/EABI -maix -mminimal-toc
23874 AIX minimal TOC translation unit
23876 Name Reg. Set by entries contains:
23877 made by addrs? fp? sum?
23879 AIX TOC 2 crt0 as Y option option
23880 AIX minimal TOC 30 prolog gcc Y Y option
23881 SVR4 SDATA 13 crt0 gcc N Y N
23882 SVR4 pic 30 prolog ld Y not yet N
23883 SVR4 PIC 30 prolog gcc Y option option
23884 EABI TOC 30 prolog gcc Y option option
23888 /* Hash functions for the hash table. */
23891 rs6000_hash_constant (rtx k)
23893 enum rtx_code code = GET_CODE (k);
23894 machine_mode mode = GET_MODE (k);
23895 unsigned result = (code << 3) ^ mode;
23896 const char *format;
23899 format = GET_RTX_FORMAT (code);
23900 flen = strlen (format);
23906 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
23908 case CONST_WIDE_INT:
23911 flen = CONST_WIDE_INT_NUNITS (k);
23912 for (i = 0; i < flen; i++)
23913 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
23918 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
23928 for (; fidx < flen; fidx++)
23929 switch (format[fidx])
23934 const char *str = XSTR (k, fidx);
23935 len = strlen (str);
23936 result = result * 613 + len;
23937 for (i = 0; i < len; i++)
23938 result = result * 613 + (unsigned) str[i];
23943 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
23947 result = result * 613 + (unsigned) XINT (k, fidx);
23950 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
23951 result = result * 613 + (unsigned) XWINT (k, fidx);
23955 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
23956 result = result * 613 + (unsigned) (XWINT (k, fidx)
23963 gcc_unreachable ();
23970 toc_hasher::hash (toc_hash_struct *thc)
23972 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
23975 /* Compare H1 and H2 for equivalence. */
23978 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
23983 if (h1->key_mode != h2->key_mode)
23986 return rtx_equal_p (r1, r2);
23989 /* These are the names given by the C++ front-end to vtables, and
23990 vtable-like objects. Ideally, this logic should not be here;
23991 instead, there should be some programmatic way of inquiring as
23992 to whether or not an object is a vtable. */
23994 #define VTABLE_NAME_P(NAME) \
23995 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
23996 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
23997 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
23998 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
23999 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
24001 #ifdef NO_DOLLAR_IN_LABEL
24002 /* Return a GGC-allocated character string translating dollar signs in
24003 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
24006 rs6000_xcoff_strip_dollar (const char *name)
24012 q = (const char *) strchr (name, '$');
24014 if (q == 0 || q == name)
24017 len = strlen (name);
24018 strip = XALLOCAVEC (char, len + 1);
24019 strcpy (strip, name);
24020 p = strip + (q - name);
24024 p = strchr (p + 1, '$');
24027 return ggc_alloc_string (strip, len);
24032 rs6000_output_symbol_ref (FILE *file, rtx x)
24034 const char *name = XSTR (x, 0);
24036 /* Currently C++ toc references to vtables can be emitted before it
24037 is decided whether the vtable is public or private. If this is
24038 the case, then the linker will eventually complain that there is
24039 a reference to an unknown section. Thus, for vtables only,
24040 we emit the TOC reference to reference the identifier and not the
24042 if (VTABLE_NAME_P (name))
24044 RS6000_OUTPUT_BASENAME (file, name);
24047 assemble_name (file, name);
24050 /* Output a TOC entry. We derive the entry name from what is being
24054 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
24057 const char *name = buf;
24059 HOST_WIDE_INT offset = 0;
24061 gcc_assert (!TARGET_NO_TOC);
24063 /* When the linker won't eliminate them, don't output duplicate
24064 TOC entries (this happens on AIX if there is any kind of TOC,
24065 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
24067 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
24069 struct toc_hash_struct *h;
24071 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
24072 time because GGC is not initialized at that point. */
24073 if (toc_hash_table == NULL)
24074 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
24076 h = ggc_alloc<toc_hash_struct> ();
24078 h->key_mode = mode;
24079 h->labelno = labelno;
24081 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
24082 if (*found == NULL)
24084 else /* This is indeed a duplicate.
24085 Set this label equal to that label. */
24087 fputs ("\t.set ", file);
24088 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24089 fprintf (file, "%d,", labelno);
24090 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24091 fprintf (file, "%d\n", ((*found)->labelno));
24094 if (TARGET_XCOFF && SYMBOL_REF_P (x)
24095 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
24096 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
24098 fputs ("\t.set ", file);
24099 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24100 fprintf (file, "%d,", labelno);
24101 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24102 fprintf (file, "%d\n", ((*found)->labelno));
24109 /* If we're going to put a double constant in the TOC, make sure it's
24110 aligned properly when strict alignment is on. */
24111 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
24112 && STRICT_ALIGNMENT
24113 && GET_MODE_BITSIZE (mode) >= 64
24114 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
24115 ASM_OUTPUT_ALIGN (file, 3);
24118 (*targetm.asm_out.internal_label) (file, "LC", labelno);
24120 /* Handle FP constants specially. Note that if we have a minimal
24121 TOC, things we put here aren't actually in the TOC, so we can allow
24123 if (CONST_DOUBLE_P (x)
24124 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
24125 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
24129 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24130 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
24132 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24136 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24137 fputs (DOUBLE_INT_ASM_OP, file);
24139 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24140 k[0] & 0xffffffff, k[1] & 0xffffffff,
24141 k[2] & 0xffffffff, k[3] & 0xffffffff);
24142 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
24143 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24144 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
24145 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
24146 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
24151 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24152 fputs ("\t.long ", file);
24154 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24155 k[0] & 0xffffffff, k[1] & 0xffffffff,
24156 k[2] & 0xffffffff, k[3] & 0xffffffff);
24157 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
24158 k[0] & 0xffffffff, k[1] & 0xffffffff,
24159 k[2] & 0xffffffff, k[3] & 0xffffffff);
24163 else if (CONST_DOUBLE_P (x)
24164 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
24168 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24169 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
24171 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24175 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24176 fputs (DOUBLE_INT_ASM_OP, file);
24178 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24179 k[0] & 0xffffffff, k[1] & 0xffffffff);
24180 fprintf (file, "0x%lx%08lx\n",
24181 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24182 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
24187 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24188 fputs ("\t.long ", file);
24190 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24191 k[0] & 0xffffffff, k[1] & 0xffffffff);
24192 fprintf (file, "0x%lx,0x%lx\n",
24193 k[0] & 0xffffffff, k[1] & 0xffffffff);
24197 else if (CONST_DOUBLE_P (x)
24198 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
24202 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24203 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
24205 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
24209 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24210 fputs (DOUBLE_INT_ASM_OP, file);
24212 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24213 if (WORDS_BIG_ENDIAN)
24214 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
24216 fprintf (file, "0x%lx\n", l & 0xffffffff);
24221 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24222 fputs ("\t.long ", file);
24224 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24225 fprintf (file, "0x%lx\n", l & 0xffffffff);
24229 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
24231 unsigned HOST_WIDE_INT low;
24232 HOST_WIDE_INT high;
24234 low = INTVAL (x) & 0xffffffff;
24235 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
24237 /* TOC entries are always Pmode-sized, so when big-endian
24238 smaller integer constants in the TOC need to be padded.
24239 (This is still a win over putting the constants in
24240 a separate constant pool, because then we'd have
24241 to have both a TOC entry _and_ the actual constant.)
24243 For a 32-bit target, CONST_INT values are loaded and shifted
24244 entirely within `low' and can be stored in one TOC entry. */
24246 /* It would be easy to make this work, but it doesn't now. */
24247 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
24249 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
24252 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
24253 high = (HOST_WIDE_INT) low >> 32;
24259 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24260 fputs (DOUBLE_INT_ASM_OP, file);
24262 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24263 (long) high & 0xffffffff, (long) low & 0xffffffff);
24264 fprintf (file, "0x%lx%08lx\n",
24265 (long) high & 0xffffffff, (long) low & 0xffffffff);
24270 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
24272 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24273 fputs ("\t.long ", file);
24275 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24276 (long) high & 0xffffffff, (long) low & 0xffffffff);
24277 fprintf (file, "0x%lx,0x%lx\n",
24278 (long) high & 0xffffffff, (long) low & 0xffffffff);
24282 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24283 fputs ("\t.long ", file);
24285 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
24286 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
24292 if (GET_CODE (x) == CONST)
24294 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
24295 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
24297 base = XEXP (XEXP (x, 0), 0);
24298 offset = INTVAL (XEXP (XEXP (x, 0), 1));
24301 switch (GET_CODE (base))
24304 name = XSTR (base, 0);
24308 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
24309 CODE_LABEL_NUMBER (XEXP (base, 0)));
24313 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
24317 gcc_unreachable ();
24320 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24321 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
24324 fputs ("\t.tc ", file);
24325 RS6000_OUTPUT_BASENAME (file, name);
24328 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
24330 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
24332 /* Mark large TOC symbols on AIX with [TE] so they are mapped
24333 after other TOC symbols, reducing overflow of small TOC access
24334 to [TC] symbols. */
24335 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
24336 ? "[TE]," : "[TC],", file);
24339 /* Currently C++ toc references to vtables can be emitted before it
24340 is decided whether the vtable is public or private. If this is
24341 the case, then the linker will eventually complain that there is
24342 a TOC reference to an unknown section. Thus, for vtables only,
24343 we emit the TOC reference to reference the symbol and not the
24345 if (VTABLE_NAME_P (name))
24347 RS6000_OUTPUT_BASENAME (file, name);
24349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
24350 else if (offset > 0)
24351 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
24354 output_addr_const (file, x);
24357 if (TARGET_XCOFF && SYMBOL_REF_P (base))
24359 switch (SYMBOL_REF_TLS_MODEL (base))
24363 case TLS_MODEL_LOCAL_EXEC:
24364 fputs ("@le", file);
24366 case TLS_MODEL_INITIAL_EXEC:
24367 fputs ("@ie", file);
24369 /* Use global-dynamic for local-dynamic. */
24370 case TLS_MODEL_GLOBAL_DYNAMIC:
24371 case TLS_MODEL_LOCAL_DYNAMIC:
24373 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
24374 fputs ("\t.tc .", file);
24375 RS6000_OUTPUT_BASENAME (file, name);
24376 fputs ("[TC],", file);
24377 output_addr_const (file, x);
24378 fputs ("@m", file);
24381 gcc_unreachable ();
24389 /* Output an assembler pseudo-op to write an ASCII string of N characters
24390 starting at P to FILE.
24392 On the RS/6000, we have to do this using the .byte operation and
24393 write out special characters outside the quoted string.
24394 Also, the assembler is broken; very long strings are truncated,
24395 so we must artificially break them up early. */
24398 output_ascii (FILE *file, const char *p, int n)
24401 int i, count_string;
24402 const char *for_string = "\t.byte \"";
24403 const char *for_decimal = "\t.byte ";
24404 const char *to_close = NULL;
24407 for (i = 0; i < n; i++)
24410 if (c >= ' ' && c < 0177)
24413 fputs (for_string, file);
24416 /* Write two quotes to get one. */
24424 for_decimal = "\"\n\t.byte ";
24428 if (count_string >= 512)
24430 fputs (to_close, file);
24432 for_string = "\t.byte \"";
24433 for_decimal = "\t.byte ";
24441 fputs (for_decimal, file);
24442 fprintf (file, "%d", c);
24444 for_string = "\n\t.byte \"";
24445 for_decimal = ", ";
24451 /* Now close the string if we have written one. Then end the line. */
24453 fputs (to_close, file);
24456 /* Generate a unique section name for FILENAME for a section type
24457 represented by SECTION_DESC. Output goes into BUF.
24459 SECTION_DESC can be any string, as long as it is different for each
24460 possible section type.
24462 We name the section in the same manner as xlc. The name begins with an
24463 underscore followed by the filename (after stripping any leading directory
24464 names) with the last period replaced by the string SECTION_DESC. If
24465 FILENAME does not contain a period, SECTION_DESC is appended to the end of
24469 rs6000_gen_section_name (char **buf, const char *filename,
24470 const char *section_desc)
24472 const char *q, *after_last_slash, *last_period = 0;
24476 after_last_slash = filename;
24477 for (q = filename; *q; q++)
24480 after_last_slash = q + 1;
24481 else if (*q == '.')
24485 len = strlen (after_last_slash) + strlen (section_desc) + 2;
24486 *buf = (char *) xmalloc (len);
24491 for (q = after_last_slash; *q; q++)
24493 if (q == last_period)
24495 strcpy (p, section_desc);
24496 p += strlen (section_desc);
24500 else if (ISALNUM (*q))
24504 if (last_period == 0)
24505 strcpy (p, section_desc);
24510 /* Emit profile function. */
24513 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
24515 /* Non-standard profiling for kernels, which just saves LR then calls
24516 _mcount without worrying about arg saves. The idea is to change
24517 the function prologue as little as possible as it isn't easy to
24518 account for arg save/restore code added just for _mcount. */
24519 if (TARGET_PROFILE_KERNEL)
24522 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24524 #ifndef NO_PROFILE_COUNTERS
24525 # define NO_PROFILE_COUNTERS 0
24527 if (NO_PROFILE_COUNTERS)
24528 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24529 LCT_NORMAL, VOIDmode);
24533 const char *label_name;
24536 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24537 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
24538 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
24540 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24541 LCT_NORMAL, VOIDmode, fun, Pmode);
24544 else if (DEFAULT_ABI == ABI_DARWIN)
24546 const char *mcount_name = RS6000_MCOUNT;
24547 int caller_addr_regno = LR_REGNO;
24549 /* Be conservative and always set this, at least for now. */
24550 crtl->uses_pic_offset_table = 1;
24553 /* For PIC code, set up a stub and collect the caller's address
24554 from r0, which is where the prologue puts it. */
24555 if (MACHOPIC_INDIRECT
24556 && crtl->uses_pic_offset_table)
24557 caller_addr_regno = 0;
24559 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
24560 LCT_NORMAL, VOIDmode,
24561 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
24565 /* Write function profiler code. */
24568 output_function_profiler (FILE *file, int labelno)
24572 switch (DEFAULT_ABI)
24575 gcc_unreachable ();
24580 warning (0, "no profiling of 64-bit code for this ABI");
24583 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24584 fprintf (file, "\tmflr %s\n", reg_names[0]);
24585 if (NO_PROFILE_COUNTERS)
24587 asm_fprintf (file, "\tstw %s,4(%s)\n",
24588 reg_names[0], reg_names[1]);
24590 else if (TARGET_SECURE_PLT && flag_pic)
24592 if (TARGET_LINK_STACK)
24595 get_ppc476_thunk_name (name);
24596 asm_fprintf (file, "\tbl %s\n", name);
24599 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
24600 asm_fprintf (file, "\tstw %s,4(%s)\n",
24601 reg_names[0], reg_names[1]);
24602 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24603 asm_fprintf (file, "\taddis %s,%s,",
24604 reg_names[12], reg_names[12]);
24605 assemble_name (file, buf);
24606 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
24607 assemble_name (file, buf);
24608 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
24610 else if (flag_pic == 1)
24612 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
24613 asm_fprintf (file, "\tstw %s,4(%s)\n",
24614 reg_names[0], reg_names[1]);
24615 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24616 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
24617 assemble_name (file, buf);
24618 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
24620 else if (flag_pic > 1)
24622 asm_fprintf (file, "\tstw %s,4(%s)\n",
24623 reg_names[0], reg_names[1]);
24624 /* Now, we need to get the address of the label. */
24625 if (TARGET_LINK_STACK)
24628 get_ppc476_thunk_name (name);
24629 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
24630 assemble_name (file, buf);
24631 fputs ("-.\n1:", file);
24632 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24633 asm_fprintf (file, "\taddi %s,%s,4\n",
24634 reg_names[11], reg_names[11]);
24638 fputs ("\tbcl 20,31,1f\n\t.long ", file);
24639 assemble_name (file, buf);
24640 fputs ("-.\n1:", file);
24641 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24643 asm_fprintf (file, "\tlwz %s,0(%s)\n",
24644 reg_names[0], reg_names[11]);
24645 asm_fprintf (file, "\tadd %s,%s,%s\n",
24646 reg_names[0], reg_names[0], reg_names[11]);
24650 asm_fprintf (file, "\tlis %s,", reg_names[12]);
24651 assemble_name (file, buf);
24652 fputs ("@ha\n", file);
24653 asm_fprintf (file, "\tstw %s,4(%s)\n",
24654 reg_names[0], reg_names[1]);
24655 asm_fprintf (file, "\tla %s,", reg_names[0]);
24656 assemble_name (file, buf);
24657 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
24660 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
24661 fprintf (file, "\tbl %s%s\n",
24662 RS6000_MCOUNT, flag_pic ? "@plt" : "");
24668 /* Don't do anything, done in output_profile_hook (). */
24675 /* The following variable value is the last issued insn. */
24677 static rtx_insn *last_scheduled_insn;
24679 /* The following variable helps to balance issuing of load and
24680 store instructions */
24682 static int load_store_pendulum;
24684 /* The following variable helps pair divide insns during scheduling. */
24685 static int divide_cnt;
24686 /* The following variable helps pair and alternate vector and vector load
24687 insns during scheduling. */
24688 static int vec_pairing;
24691 /* Power4 load update and store update instructions are cracked into a
24692 load or store and an integer insn which are executed in the same cycle.
24693 Branches have their own dispatch slot which does not count against the
24694 GCC issue rate, but it changes the program flow so there are no other
24695 instructions to issue in this cycle. */
24698 rs6000_variable_issue_1 (rtx_insn *insn, int more)
24700 last_scheduled_insn = insn;
24701 if (GET_CODE (PATTERN (insn)) == USE
24702 || GET_CODE (PATTERN (insn)) == CLOBBER)
24704 cached_can_issue_more = more;
24705 return cached_can_issue_more;
24708 if (insn_terminates_group_p (insn, current_group))
24710 cached_can_issue_more = 0;
24711 return cached_can_issue_more;
24714 /* If no reservation, but reach here */
24715 if (recog_memoized (insn) < 0)
24718 if (rs6000_sched_groups)
24720 if (is_microcoded_insn (insn))
24721 cached_can_issue_more = 0;
24722 else if (is_cracked_insn (insn))
24723 cached_can_issue_more = more > 2 ? more - 2 : 0;
24725 cached_can_issue_more = more - 1;
24727 return cached_can_issue_more;
24730 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
24733 cached_can_issue_more = more - 1;
24734 return cached_can_issue_more;
24738 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
24740 int r = rs6000_variable_issue_1 (insn, more);
24742 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
24746 /* Adjust the cost of a scheduling dependency. Return the new cost of
24747 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
24750 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
24753 enum attr_type attr_type;
24755 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
24762 /* Data dependency; DEP_INSN writes a register that INSN reads
24763 some cycles later. */
24765 /* Separate a load from a narrower, dependent store. */
24766 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
24767 || rs6000_tune == PROCESSOR_FUTURE)
24768 && GET_CODE (PATTERN (insn)) == SET
24769 && GET_CODE (PATTERN (dep_insn)) == SET
24770 && MEM_P (XEXP (PATTERN (insn), 1))
24771 && MEM_P (XEXP (PATTERN (dep_insn), 0))
24772 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
24773 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
24776 attr_type = get_attr_type (insn);
24781 /* Tell the first scheduling pass about the latency between
24782 a mtctr and bctr (and mtlr and br/blr). The first
24783 scheduling pass will not know about this latency since
24784 the mtctr instruction, which has the latency associated
24785 to it, will be generated by reload. */
24788 /* Leave some extra cycles between a compare and its
24789 dependent branch, to inhibit expensive mispredicts. */
24790 if ((rs6000_tune == PROCESSOR_PPC603
24791 || rs6000_tune == PROCESSOR_PPC604
24792 || rs6000_tune == PROCESSOR_PPC604e
24793 || rs6000_tune == PROCESSOR_PPC620
24794 || rs6000_tune == PROCESSOR_PPC630
24795 || rs6000_tune == PROCESSOR_PPC750
24796 || rs6000_tune == PROCESSOR_PPC7400
24797 || rs6000_tune == PROCESSOR_PPC7450
24798 || rs6000_tune == PROCESSOR_PPCE5500
24799 || rs6000_tune == PROCESSOR_PPCE6500
24800 || rs6000_tune == PROCESSOR_POWER4
24801 || rs6000_tune == PROCESSOR_POWER5
24802 || rs6000_tune == PROCESSOR_POWER7
24803 || rs6000_tune == PROCESSOR_POWER8
24804 || rs6000_tune == PROCESSOR_POWER9
24805 || rs6000_tune == PROCESSOR_FUTURE
24806 || rs6000_tune == PROCESSOR_CELL)
24807 && recog_memoized (dep_insn)
24808 && (INSN_CODE (dep_insn) >= 0))
24810 switch (get_attr_type (dep_insn))
24813 case TYPE_FPCOMPARE:
24814 case TYPE_CR_LOGICAL:
24818 if (get_attr_dot (dep_insn) == DOT_YES)
24823 if (get_attr_dot (dep_insn) == DOT_YES
24824 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
24835 if ((rs6000_tune == PROCESSOR_POWER6)
24836 && recog_memoized (dep_insn)
24837 && (INSN_CODE (dep_insn) >= 0))
24840 if (GET_CODE (PATTERN (insn)) != SET)
24841 /* If this happens, we have to extend this to schedule
24842 optimally. Return default for now. */
24845 /* Adjust the cost for the case where the value written
24846 by a fixed point operation is used as the address
24847 gen value on a store. */
24848 switch (get_attr_type (dep_insn))
24853 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24854 return get_attr_sign_extend (dep_insn)
24855 == SIGN_EXTEND_YES ? 6 : 4;
24860 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24861 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24871 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24879 if (get_attr_update (dep_insn) == UPDATE_YES
24880 && ! rs6000_store_data_bypass_p (dep_insn, insn))
24886 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24892 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24893 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24903 if ((rs6000_tune == PROCESSOR_POWER6)
24904 && recog_memoized (dep_insn)
24905 && (INSN_CODE (dep_insn) >= 0))
24908 /* Adjust the cost for the case where the value written
24909 by a fixed point instruction is used within the address
24910 gen portion of a subsequent load(u)(x) */
24911 switch (get_attr_type (dep_insn))
24916 if (set_to_load_agen (dep_insn, insn))
24917 return get_attr_sign_extend (dep_insn)
24918 == SIGN_EXTEND_YES ? 6 : 4;
24923 if (set_to_load_agen (dep_insn, insn))
24924 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24934 if (set_to_load_agen (dep_insn, insn))
24942 if (get_attr_update (dep_insn) == UPDATE_YES
24943 && set_to_load_agen (dep_insn, insn))
24949 if (set_to_load_agen (dep_insn, insn))
24955 if (set_to_load_agen (dep_insn, insn))
24956 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24966 if ((rs6000_tune == PROCESSOR_POWER6)
24967 && get_attr_update (insn) == UPDATE_NO
24968 && recog_memoized (dep_insn)
24969 && (INSN_CODE (dep_insn) >= 0)
24970 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
24977 /* Fall out to return default cost. */
24981 case REG_DEP_OUTPUT:
24982 /* Output dependency; DEP_INSN writes a register that INSN writes some
24984 if ((rs6000_tune == PROCESSOR_POWER6)
24985 && recog_memoized (dep_insn)
24986 && (INSN_CODE (dep_insn) >= 0))
24988 attr_type = get_attr_type (insn);
24993 case TYPE_FPSIMPLE:
24994 if (get_attr_type (dep_insn) == TYPE_FP
24995 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
24999 if (get_attr_update (insn) == UPDATE_NO
25000 && get_attr_type (dep_insn) == TYPE_MFFGPR)
25007 /* Fall through, no cost for output dependency. */
25011 /* Anti dependency; DEP_INSN reads a register that INSN writes some
25016 gcc_unreachable ();
25022 /* Debug version of rs6000_adjust_cost. */
25025 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
25026 int cost, unsigned int dw)
25028 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
25036 default: dep = "unknown depencency"; break;
25037 case REG_DEP_TRUE: dep = "data dependency"; break;
25038 case REG_DEP_OUTPUT: dep = "output dependency"; break;
25039 case REG_DEP_ANTI: dep = "anti depencency"; break;
25043 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
25044 "%s, insn:\n", ret, cost, dep);
25052 /* The function returns a true if INSN is microcoded.
25053 Return false otherwise. */
25056 is_microcoded_insn (rtx_insn *insn)
25058 if (!insn || !NONDEBUG_INSN_P (insn)
25059 || GET_CODE (PATTERN (insn)) == USE
25060 || GET_CODE (PATTERN (insn)) == CLOBBER)
25063 if (rs6000_tune == PROCESSOR_CELL)
25064 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
25066 if (rs6000_sched_groups
25067 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25069 enum attr_type type = get_attr_type (insn);
25070 if ((type == TYPE_LOAD
25071 && get_attr_update (insn) == UPDATE_YES
25072 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25073 || ((type == TYPE_LOAD || type == TYPE_STORE)
25074 && get_attr_update (insn) == UPDATE_YES
25075 && get_attr_indexed (insn) == INDEXED_YES)
25076 || type == TYPE_MFCR)
25083 /* The function returns true if INSN is cracked into 2 instructions
25084 by the processor (and therefore occupies 2 issue slots). */
25087 is_cracked_insn (rtx_insn *insn)
25089 if (!insn || !NONDEBUG_INSN_P (insn)
25090 || GET_CODE (PATTERN (insn)) == USE
25091 || GET_CODE (PATTERN (insn)) == CLOBBER)
25094 if (rs6000_sched_groups
25095 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25097 enum attr_type type = get_attr_type (insn);
25098 if ((type == TYPE_LOAD
25099 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
25100 && get_attr_update (insn) == UPDATE_NO)
25101 || (type == TYPE_LOAD
25102 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
25103 && get_attr_update (insn) == UPDATE_YES
25104 && get_attr_indexed (insn) == INDEXED_NO)
25105 || (type == TYPE_STORE
25106 && get_attr_update (insn) == UPDATE_YES
25107 && get_attr_indexed (insn) == INDEXED_NO)
25108 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
25109 && get_attr_update (insn) == UPDATE_YES)
25110 || (type == TYPE_CR_LOGICAL
25111 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
25112 || (type == TYPE_EXTS
25113 && get_attr_dot (insn) == DOT_YES)
25114 || (type == TYPE_SHIFT
25115 && get_attr_dot (insn) == DOT_YES
25116 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
25117 || (type == TYPE_MUL
25118 && get_attr_dot (insn) == DOT_YES)
25119 || type == TYPE_DIV
25120 || (type == TYPE_INSERT
25121 && get_attr_size (insn) == SIZE_32))
25128 /* The function returns true if INSN can be issued only from
25129 the branch slot. */
25132 is_branch_slot_insn (rtx_insn *insn)
25134 if (!insn || !NONDEBUG_INSN_P (insn)
25135 || GET_CODE (PATTERN (insn)) == USE
25136 || GET_CODE (PATTERN (insn)) == CLOBBER)
25139 if (rs6000_sched_groups)
25141 enum attr_type type = get_attr_type (insn);
25142 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
25150 /* The function returns true if out_inst sets a value that is
25151 used in the address generation computation of in_insn */
25153 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
25155 rtx out_set, in_set;
25157 /* For performance reasons, only handle the simple case where
25158 both loads are a single_set. */
25159 out_set = single_set (out_insn);
25162 in_set = single_set (in_insn);
25164 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
25170 /* Try to determine base/offset/size parts of the given MEM.
25171 Return true if successful, false if all the values couldn't
25174 This function only looks for REG or REG+CONST address forms.
25175 REG+REG address form will return false. */
25178 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
25179 HOST_WIDE_INT *size)
25182 if MEM_SIZE_KNOWN_P (mem)
25183 *size = MEM_SIZE (mem);
25187 addr_rtx = (XEXP (mem, 0));
25188 if (GET_CODE (addr_rtx) == PRE_MODIFY)
25189 addr_rtx = XEXP (addr_rtx, 1);
25192 while (GET_CODE (addr_rtx) == PLUS
25193 && CONST_INT_P (XEXP (addr_rtx, 1)))
25195 *offset += INTVAL (XEXP (addr_rtx, 1));
25196 addr_rtx = XEXP (addr_rtx, 0);
25198 if (!REG_P (addr_rtx))
25205 /* The function returns true if the target storage location of
25206 mem1 is adjacent to the target storage location of mem2 */
25207 /* Return 1 if memory locations are adjacent. */
25210 adjacent_mem_locations (rtx mem1, rtx mem2)
25213 HOST_WIDE_INT off1, size1, off2, size2;
25215 if (get_memref_parts (mem1, ®1, &off1, &size1)
25216 && get_memref_parts (mem2, ®2, &off2, &size2))
25217 return ((REGNO (reg1) == REGNO (reg2))
25218 && ((off1 + size1 == off2)
25219 || (off2 + size2 == off1)));
25224 /* This function returns true if it can be determined that the two MEM
25225 locations overlap by at least 1 byte based on base reg/offset/size. */
25228 mem_locations_overlap (rtx mem1, rtx mem2)
25231 HOST_WIDE_INT off1, size1, off2, size2;
25233 if (get_memref_parts (mem1, ®1, &off1, &size1)
25234 && get_memref_parts (mem2, ®2, &off2, &size2))
25235 return ((REGNO (reg1) == REGNO (reg2))
25236 && (((off1 <= off2) && (off1 + size1 > off2))
25237 || ((off2 <= off1) && (off2 + size2 > off1))));
25242 /* A C statement (sans semicolon) to update the integer scheduling
25243 priority INSN_PRIORITY (INSN). Increase the priority to execute the
25244 INSN earlier, reduce the priority to execute INSN later. Do not
25245 define this macro if you do not need to adjust the scheduling
25246 priorities of insns. */
25249 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
25251 rtx load_mem, str_mem;
25252 /* On machines (like the 750) which have asymmetric integer units,
25253 where one integer unit can do multiply and divides and the other
25254 can't, reduce the priority of multiply/divide so it is scheduled
25255 before other integer operations. */
25258 if (! INSN_P (insn))
25261 if (GET_CODE (PATTERN (insn)) == USE)
25264 switch (rs6000_tune) {
25265 case PROCESSOR_PPC750:
25266 switch (get_attr_type (insn))
25273 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
25274 priority, priority);
25275 if (priority >= 0 && priority < 0x01000000)
25282 if (insn_must_be_first_in_group (insn)
25283 && reload_completed
25284 && current_sched_info->sched_max_insns_priority
25285 && rs6000_sched_restricted_insns_priority)
25288 /* Prioritize insns that can be dispatched only in the first
25290 if (rs6000_sched_restricted_insns_priority == 1)
25291 /* Attach highest priority to insn. This means that in
25292 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
25293 precede 'priority' (critical path) considerations. */
25294 return current_sched_info->sched_max_insns_priority;
25295 else if (rs6000_sched_restricted_insns_priority == 2)
25296 /* Increase priority of insn by a minimal amount. This means that in
25297 haifa-sched.c:ready_sort(), only 'priority' (critical path)
25298 considerations precede dispatch-slot restriction considerations. */
25299 return (priority + 1);
25302 if (rs6000_tune == PROCESSOR_POWER6
25303 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
25304 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
25305 /* Attach highest priority to insn if the scheduler has just issued two
25306 stores and this instruction is a load, or two loads and this instruction
25307 is a store. Power6 wants loads and stores scheduled alternately
25309 return current_sched_info->sched_max_insns_priority;
25314 /* Return true if the instruction is nonpipelined on the Cell. */
25316 is_nonpipeline_insn (rtx_insn *insn)
25318 enum attr_type type;
25319 if (!insn || !NONDEBUG_INSN_P (insn)
25320 || GET_CODE (PATTERN (insn)) == USE
25321 || GET_CODE (PATTERN (insn)) == CLOBBER)
25324 type = get_attr_type (insn);
25325 if (type == TYPE_MUL
25326 || type == TYPE_DIV
25327 || type == TYPE_SDIV
25328 || type == TYPE_DDIV
25329 || type == TYPE_SSQRT
25330 || type == TYPE_DSQRT
25331 || type == TYPE_MFCR
25332 || type == TYPE_MFCRF
25333 || type == TYPE_MFJMPR)
25341 /* Return how many instructions the machine can issue per cycle. */
25344 rs6000_issue_rate (void)
25346 /* Unless scheduling for register pressure, use issue rate of 1 for
25347 first scheduling pass to decrease degradation. */
25348 if (!reload_completed && !flag_sched_pressure)
25351 switch (rs6000_tune) {
25352 case PROCESSOR_RS64A:
25353 case PROCESSOR_PPC601: /* ? */
25354 case PROCESSOR_PPC7450:
25356 case PROCESSOR_PPC440:
25357 case PROCESSOR_PPC603:
25358 case PROCESSOR_PPC750:
25359 case PROCESSOR_PPC7400:
25360 case PROCESSOR_PPC8540:
25361 case PROCESSOR_PPC8548:
25362 case PROCESSOR_CELL:
25363 case PROCESSOR_PPCE300C2:
25364 case PROCESSOR_PPCE300C3:
25365 case PROCESSOR_PPCE500MC:
25366 case PROCESSOR_PPCE500MC64:
25367 case PROCESSOR_PPCE5500:
25368 case PROCESSOR_PPCE6500:
25369 case PROCESSOR_TITAN:
25371 case PROCESSOR_PPC476:
25372 case PROCESSOR_PPC604:
25373 case PROCESSOR_PPC604e:
25374 case PROCESSOR_PPC620:
25375 case PROCESSOR_PPC630:
25377 case PROCESSOR_POWER4:
25378 case PROCESSOR_POWER5:
25379 case PROCESSOR_POWER6:
25380 case PROCESSOR_POWER7:
25382 case PROCESSOR_POWER8:
25384 case PROCESSOR_POWER9:
25385 case PROCESSOR_FUTURE:
25392 /* Return how many instructions to look ahead for better insn
25396 rs6000_use_sched_lookahead (void)
25398 switch (rs6000_tune)
25400 case PROCESSOR_PPC8540:
25401 case PROCESSOR_PPC8548:
25404 case PROCESSOR_CELL:
25405 return (reload_completed ? 8 : 0);
25412 /* We are choosing insn from the ready queue. Return zero if INSN can be
25415 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
25417 if (ready_index == 0)
25420 if (rs6000_tune != PROCESSOR_CELL)
25423 gcc_assert (insn != NULL_RTX && INSN_P (insn));
25425 if (!reload_completed
25426 || is_nonpipeline_insn (insn)
25427 || is_microcoded_insn (insn))
25433 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
25434 and return true. */
25437 find_mem_ref (rtx pat, rtx *mem_ref)
25442 /* stack_tie does not produce any real memory traffic. */
25443 if (tie_operand (pat, VOIDmode))
25452 /* Recursively process the pattern. */
25453 fmt = GET_RTX_FORMAT (GET_CODE (pat));
25455 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
25459 if (find_mem_ref (XEXP (pat, i), mem_ref))
25462 else if (fmt[i] == 'E')
25463 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
25465 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
25473 /* Determine if PAT is a PATTERN of a load insn. */
25476 is_load_insn1 (rtx pat, rtx *load_mem)
25478 if (!pat || pat == NULL_RTX)
25481 if (GET_CODE (pat) == SET)
25482 return find_mem_ref (SET_SRC (pat), load_mem);
25484 if (GET_CODE (pat) == PARALLEL)
25488 for (i = 0; i < XVECLEN (pat, 0); i++)
25489 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
25496 /* Determine if INSN loads from memory. */
25499 is_load_insn (rtx insn, rtx *load_mem)
25501 if (!insn || !INSN_P (insn))
25507 return is_load_insn1 (PATTERN (insn), load_mem);
25510 /* Determine if PAT is a PATTERN of a store insn. */
25513 is_store_insn1 (rtx pat, rtx *str_mem)
25515 if (!pat || pat == NULL_RTX)
25518 if (GET_CODE (pat) == SET)
25519 return find_mem_ref (SET_DEST (pat), str_mem);
25521 if (GET_CODE (pat) == PARALLEL)
25525 for (i = 0; i < XVECLEN (pat, 0); i++)
25526 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
25533 /* Determine if INSN stores to memory. */
25536 is_store_insn (rtx insn, rtx *str_mem)
25538 if (!insn || !INSN_P (insn))
25541 return is_store_insn1 (PATTERN (insn), str_mem);
25544 /* Return whether TYPE is a Power9 pairable vector instruction type. */
25547 is_power9_pairable_vec_type (enum attr_type type)
25551 case TYPE_VECSIMPLE:
25552 case TYPE_VECCOMPLEX:
25556 case TYPE_VECFLOAT:
25558 case TYPE_VECDOUBLE:
25566 /* Returns whether the dependence between INSN and NEXT is considered
25567 costly by the given target. */
25570 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
25574 rtx load_mem, str_mem;
25576 /* If the flag is not enabled - no dependence is considered costly;
25577 allow all dependent insns in the same group.
25578 This is the most aggressive option. */
25579 if (rs6000_sched_costly_dep == no_dep_costly)
25582 /* If the flag is set to 1 - a dependence is always considered costly;
25583 do not allow dependent instructions in the same group.
25584 This is the most conservative option. */
25585 if (rs6000_sched_costly_dep == all_deps_costly)
25588 insn = DEP_PRO (dep);
25589 next = DEP_CON (dep);
25591 if (rs6000_sched_costly_dep == store_to_load_dep_costly
25592 && is_load_insn (next, &load_mem)
25593 && is_store_insn (insn, &str_mem))
25594 /* Prevent load after store in the same group. */
25597 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
25598 && is_load_insn (next, &load_mem)
25599 && is_store_insn (insn, &str_mem)
25600 && DEP_TYPE (dep) == REG_DEP_TRUE
25601 && mem_locations_overlap(str_mem, load_mem))
25602 /* Prevent load after store in the same group if it is a true
25606 /* The flag is set to X; dependences with latency >= X are considered costly,
25607 and will not be scheduled in the same group. */
25608 if (rs6000_sched_costly_dep <= max_dep_latency
25609 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
25615 /* Return the next insn after INSN that is found before TAIL is reached,
25616 skipping any "non-active" insns - insns that will not actually occupy
25617 an issue slot. Return NULL_RTX if such an insn is not found. */
25620 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
25622 if (insn == NULL_RTX || insn == tail)
25627 insn = NEXT_INSN (insn);
25628 if (insn == NULL_RTX || insn == tail)
25632 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
25633 || (NONJUMP_INSN_P (insn)
25634 && GET_CODE (PATTERN (insn)) != USE
25635 && GET_CODE (PATTERN (insn)) != CLOBBER
25636 && INSN_CODE (insn) != CODE_FOR_stack_tie))
25642 /* Do Power9 specific sched_reorder2 reordering of ready list. */
25645 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
25650 enum attr_type type, type2;
25652 type = get_attr_type (last_scheduled_insn);
25654 /* Try to issue fixed point divides back-to-back in pairs so they will be
25655 routed to separate execution units and execute in parallel. */
25656 if (type == TYPE_DIV && divide_cnt == 0)
25658 /* First divide has been scheduled. */
25661 /* Scan the ready list looking for another divide, if found move it
25662 to the end of the list so it is chosen next. */
25666 if (recog_memoized (ready[pos]) >= 0
25667 && get_attr_type (ready[pos]) == TYPE_DIV)
25670 for (i = pos; i < lastpos; i++)
25671 ready[i] = ready[i + 1];
25672 ready[lastpos] = tmp;
25680 /* Last insn was the 2nd divide or not a divide, reset the counter. */
25683 /* The best dispatch throughput for vector and vector load insns can be
25684 achieved by interleaving a vector and vector load such that they'll
25685 dispatch to the same superslice. If this pairing cannot be achieved
25686 then it is best to pair vector insns together and vector load insns
25689 To aid in this pairing, vec_pairing maintains the current state with
25690 the following values:
25692 0 : Initial state, no vecload/vector pairing has been started.
25694 1 : A vecload or vector insn has been issued and a candidate for
25695 pairing has been found and moved to the end of the ready
25697 if (type == TYPE_VECLOAD)
25699 /* Issued a vecload. */
25700 if (vec_pairing == 0)
25702 int vecload_pos = -1;
25703 /* We issued a single vecload, look for a vector insn to pair it
25704 with. If one isn't found, try to pair another vecload. */
25708 if (recog_memoized (ready[pos]) >= 0)
25710 type2 = get_attr_type (ready[pos]);
25711 if (is_power9_pairable_vec_type (type2))
25713 /* Found a vector insn to pair with, move it to the
25714 end of the ready list so it is scheduled next. */
25716 for (i = pos; i < lastpos; i++)
25717 ready[i] = ready[i + 1];
25718 ready[lastpos] = tmp;
25720 return cached_can_issue_more;
25722 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
25723 /* Remember position of first vecload seen. */
25728 if (vecload_pos >= 0)
25730 /* Didn't find a vector to pair with but did find a vecload,
25731 move it to the end of the ready list. */
25732 tmp = ready[vecload_pos];
25733 for (i = vecload_pos; i < lastpos; i++)
25734 ready[i] = ready[i + 1];
25735 ready[lastpos] = tmp;
25737 return cached_can_issue_more;
25741 else if (is_power9_pairable_vec_type (type))
25743 /* Issued a vector operation. */
25744 if (vec_pairing == 0)
25747 /* We issued a single vector insn, look for a vecload to pair it
25748 with. If one isn't found, try to pair another vector. */
25752 if (recog_memoized (ready[pos]) >= 0)
25754 type2 = get_attr_type (ready[pos]);
25755 if (type2 == TYPE_VECLOAD)
25757 /* Found a vecload insn to pair with, move it to the
25758 end of the ready list so it is scheduled next. */
25760 for (i = pos; i < lastpos; i++)
25761 ready[i] = ready[i + 1];
25762 ready[lastpos] = tmp;
25764 return cached_can_issue_more;
25766 else if (is_power9_pairable_vec_type (type2)
25768 /* Remember position of first vector insn seen. */
25775 /* Didn't find a vecload to pair with but did find a vector
25776 insn, move it to the end of the ready list. */
25777 tmp = ready[vec_pos];
25778 for (i = vec_pos; i < lastpos; i++)
25779 ready[i] = ready[i + 1];
25780 ready[lastpos] = tmp;
25782 return cached_can_issue_more;
25787 /* We've either finished a vec/vecload pair, couldn't find an insn to
25788 continue the current pair, or the last insn had nothing to do with
25789 with pairing. In any case, reset the state. */
25793 return cached_can_issue_more;
25796 /* We are about to begin issuing insns for this clock cycle. */
25799 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
25800 rtx_insn **ready ATTRIBUTE_UNUSED,
25801 int *pn_ready ATTRIBUTE_UNUSED,
25802 int clock_var ATTRIBUTE_UNUSED)
25804 int n_ready = *pn_ready;
25807 fprintf (dump, "// rs6000_sched_reorder :\n");
25809 /* Reorder the ready list, if the second to last ready insn
25810 is a nonepipeline insn. */
25811 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
25813 if (is_nonpipeline_insn (ready[n_ready - 1])
25814 && (recog_memoized (ready[n_ready - 2]) > 0))
25815 /* Simply swap first two insns. */
25816 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
25819 if (rs6000_tune == PROCESSOR_POWER6)
25820 load_store_pendulum = 0;
25822 return rs6000_issue_rate ();
25825 /* Like rs6000_sched_reorder, but called after issuing each insn. */
25828 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
25829 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
25832 fprintf (dump, "// rs6000_sched_reorder2 :\n");
25834 /* For Power6, we need to handle some special cases to try and keep the
25835 store queue from overflowing and triggering expensive flushes.
25837 This code monitors how load and store instructions are being issued
25838 and skews the ready list one way or the other to increase the likelihood
25839 that a desired instruction is issued at the proper time.
25841 A couple of things are done. First, we maintain a "load_store_pendulum"
25842 to track the current state of load/store issue.
25844 - If the pendulum is at zero, then no loads or stores have been
25845 issued in the current cycle so we do nothing.
25847 - If the pendulum is 1, then a single load has been issued in this
25848 cycle and we attempt to locate another load in the ready list to
25851 - If the pendulum is -2, then two stores have already been
25852 issued in this cycle, so we increase the priority of the first load
25853 in the ready list to increase it's likelihood of being chosen first
25856 - If the pendulum is -1, then a single store has been issued in this
25857 cycle and we attempt to locate another store in the ready list to
25858 issue with it, preferring a store to an adjacent memory location to
25859 facilitate store pairing in the store queue.
25861 - If the pendulum is 2, then two loads have already been
25862 issued in this cycle, so we increase the priority of the first store
25863 in the ready list to increase it's likelihood of being chosen first
25866 - If the pendulum < -2 or > 2, then do nothing.
25868 Note: This code covers the most common scenarios. There exist non
25869 load/store instructions which make use of the LSU and which
25870 would need to be accounted for to strictly model the behavior
25871 of the machine. Those instructions are currently unaccounted
25872 for to help minimize compile time overhead of this code.
25874 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
25879 rtx load_mem, str_mem;
25881 if (is_store_insn (last_scheduled_insn, &str_mem))
25882 /* Issuing a store, swing the load_store_pendulum to the left */
25883 load_store_pendulum--;
25884 else if (is_load_insn (last_scheduled_insn, &load_mem))
25885 /* Issuing a load, swing the load_store_pendulum to the right */
25886 load_store_pendulum++;
25888 return cached_can_issue_more;
25890 /* If the pendulum is balanced, or there is only one instruction on
25891 the ready list, then all is well, so return. */
25892 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
25893 return cached_can_issue_more;
25895 if (load_store_pendulum == 1)
25897 /* A load has been issued in this cycle. Scan the ready list
25898 for another load to issue with it */
25903 if (is_load_insn (ready[pos], &load_mem))
25905 /* Found a load. Move it to the head of the ready list,
25906 and adjust it's priority so that it is more likely to
25909 for (i=pos; i<*pn_ready-1; i++)
25910 ready[i] = ready[i + 1];
25911 ready[*pn_ready-1] = tmp;
25913 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25914 INSN_PRIORITY (tmp)++;
25920 else if (load_store_pendulum == -2)
25922 /* Two stores have been issued in this cycle. Increase the
25923 priority of the first load in the ready list to favor it for
25924 issuing in the next cycle. */
25929 if (is_load_insn (ready[pos], &load_mem)
25931 && INSN_PRIORITY_KNOWN (ready[pos]))
25933 INSN_PRIORITY (ready[pos])++;
25935 /* Adjust the pendulum to account for the fact that a load
25936 was found and increased in priority. This is to prevent
25937 increasing the priority of multiple loads */
25938 load_store_pendulum--;
25945 else if (load_store_pendulum == -1)
25947 /* A store has been issued in this cycle. Scan the ready list for
25948 another store to issue with it, preferring a store to an adjacent
25950 int first_store_pos = -1;
25956 if (is_store_insn (ready[pos], &str_mem))
25959 /* Maintain the index of the first store found on the
25961 if (first_store_pos == -1)
25962 first_store_pos = pos;
25964 if (is_store_insn (last_scheduled_insn, &str_mem2)
25965 && adjacent_mem_locations (str_mem, str_mem2))
25967 /* Found an adjacent store. Move it to the head of the
25968 ready list, and adjust it's priority so that it is
25969 more likely to stay there */
25971 for (i=pos; i<*pn_ready-1; i++)
25972 ready[i] = ready[i + 1];
25973 ready[*pn_ready-1] = tmp;
25975 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25976 INSN_PRIORITY (tmp)++;
25978 first_store_pos = -1;
25986 if (first_store_pos >= 0)
25988 /* An adjacent store wasn't found, but a non-adjacent store was,
25989 so move the non-adjacent store to the front of the ready
25990 list, and adjust its priority so that it is more likely to
25992 tmp = ready[first_store_pos];
25993 for (i=first_store_pos; i<*pn_ready-1; i++)
25994 ready[i] = ready[i + 1];
25995 ready[*pn_ready-1] = tmp;
25996 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25997 INSN_PRIORITY (tmp)++;
26000 else if (load_store_pendulum == 2)
26002 /* Two loads have been issued in this cycle. Increase the priority
26003 of the first store in the ready list to favor it for issuing in
26009 if (is_store_insn (ready[pos], &str_mem)
26011 && INSN_PRIORITY_KNOWN (ready[pos]))
26013 INSN_PRIORITY (ready[pos])++;
26015 /* Adjust the pendulum to account for the fact that a store
26016 was found and increased in priority. This is to prevent
26017 increasing the priority of multiple stores */
26018 load_store_pendulum++;
26027 /* Do Power9 dependent reordering if necessary. */
26028 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
26029 && recog_memoized (last_scheduled_insn) >= 0)
26030 return power9_sched_reorder2 (ready, *pn_ready - 1);
26032 return cached_can_issue_more;
26035 /* Return whether the presence of INSN causes a dispatch group termination
26036 of group WHICH_GROUP.
26038 If WHICH_GROUP == current_group, this function will return true if INSN
26039 causes the termination of the current group (i.e, the dispatch group to
26040 which INSN belongs). This means that INSN will be the last insn in the
26041 group it belongs to.
26043 If WHICH_GROUP == previous_group, this function will return true if INSN
26044 causes the termination of the previous group (i.e, the dispatch group that
26045 precedes the group to which INSN belongs). This means that INSN will be
26046 the first insn in the group it belongs to). */
26049 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
26056 first = insn_must_be_first_in_group (insn);
26057 last = insn_must_be_last_in_group (insn);
26062 if (which_group == current_group)
26064 else if (which_group == previous_group)
26072 insn_must_be_first_in_group (rtx_insn *insn)
26074 enum attr_type type;
26078 || DEBUG_INSN_P (insn)
26079 || GET_CODE (PATTERN (insn)) == USE
26080 || GET_CODE (PATTERN (insn)) == CLOBBER)
26083 switch (rs6000_tune)
26085 case PROCESSOR_POWER5:
26086 if (is_cracked_insn (insn))
26089 case PROCESSOR_POWER4:
26090 if (is_microcoded_insn (insn))
26093 if (!rs6000_sched_groups)
26096 type = get_attr_type (insn);
26103 case TYPE_CR_LOGICAL:
26116 case PROCESSOR_POWER6:
26117 type = get_attr_type (insn);
26126 case TYPE_FPCOMPARE:
26137 if (get_attr_dot (insn) == DOT_NO
26138 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26143 if (get_attr_size (insn) == SIZE_32)
26151 if (get_attr_update (insn) == UPDATE_YES)
26159 case PROCESSOR_POWER7:
26160 type = get_attr_type (insn);
26164 case TYPE_CR_LOGICAL:
26178 if (get_attr_dot (insn) == DOT_YES)
26183 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26184 || get_attr_update (insn) == UPDATE_YES)
26191 if (get_attr_update (insn) == UPDATE_YES)
26199 case PROCESSOR_POWER8:
26200 type = get_attr_type (insn);
26204 case TYPE_CR_LOGICAL:
26212 case TYPE_VECSTORE:
26219 if (get_attr_dot (insn) == DOT_YES)
26224 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26225 || get_attr_update (insn) == UPDATE_YES)
26230 if (get_attr_update (insn) == UPDATE_YES
26231 && get_attr_indexed (insn) == INDEXED_YES)
26247 insn_must_be_last_in_group (rtx_insn *insn)
26249 enum attr_type type;
26253 || DEBUG_INSN_P (insn)
26254 || GET_CODE (PATTERN (insn)) == USE
26255 || GET_CODE (PATTERN (insn)) == CLOBBER)
26258 switch (rs6000_tune) {
26259 case PROCESSOR_POWER4:
26260 case PROCESSOR_POWER5:
26261 if (is_microcoded_insn (insn))
26264 if (is_branch_slot_insn (insn))
26268 case PROCESSOR_POWER6:
26269 type = get_attr_type (insn);
26277 case TYPE_FPCOMPARE:
26288 if (get_attr_dot (insn) == DOT_NO
26289 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26294 if (get_attr_size (insn) == SIZE_32)
26302 case PROCESSOR_POWER7:
26303 type = get_attr_type (insn);
26313 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26314 && get_attr_update (insn) == UPDATE_YES)
26319 if (get_attr_update (insn) == UPDATE_YES
26320 && get_attr_indexed (insn) == INDEXED_YES)
26328 case PROCESSOR_POWER8:
26329 type = get_attr_type (insn);
26341 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26342 && get_attr_update (insn) == UPDATE_YES)
26347 if (get_attr_update (insn) == UPDATE_YES
26348 && get_attr_indexed (insn) == INDEXED_YES)
26363 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
26364 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
26367 is_costly_group (rtx *group_insns, rtx next_insn)
26370 int issue_rate = rs6000_issue_rate ();
26372 for (i = 0; i < issue_rate; i++)
26374 sd_iterator_def sd_it;
26376 rtx insn = group_insns[i];
26381 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
26383 rtx next = DEP_CON (dep);
26385 if (next == next_insn
26386 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
26394 /* Utility of the function redefine_groups.
26395 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
26396 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
26397 to keep it "far" (in a separate group) from GROUP_INSNS, following
26398 one of the following schemes, depending on the value of the flag
26399 -minsert_sched_nops = X:
26400 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
26401 in order to force NEXT_INSN into a separate group.
26402 (2) X < sched_finish_regroup_exact: insert exactly X nops.
26403 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
26404 insertion (has a group just ended, how many vacant issue slots remain in the
26405 last group, and how many dispatch groups were encountered so far). */
26408 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
26409 rtx_insn *next_insn, bool *group_end, int can_issue_more,
26414 int issue_rate = rs6000_issue_rate ();
26415 bool end = *group_end;
26418 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
26419 return can_issue_more;
26421 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
26422 return can_issue_more;
26424 force = is_costly_group (group_insns, next_insn);
26426 return can_issue_more;
26428 if (sched_verbose > 6)
26429 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
26430 *group_count ,can_issue_more);
26432 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
26435 can_issue_more = 0;
26437 /* Since only a branch can be issued in the last issue_slot, it is
26438 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
26439 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
26440 in this case the last nop will start a new group and the branch
26441 will be forced to the new group. */
26442 if (can_issue_more && !is_branch_slot_insn (next_insn))
26445 /* Do we have a special group ending nop? */
26446 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
26447 || rs6000_tune == PROCESSOR_POWER8)
26449 nop = gen_group_ending_nop ();
26450 emit_insn_before (nop, next_insn);
26451 can_issue_more = 0;
26454 while (can_issue_more > 0)
26457 emit_insn_before (nop, next_insn);
26465 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
26467 int n_nops = rs6000_sched_insert_nops;
26469 /* Nops can't be issued from the branch slot, so the effective
26470 issue_rate for nops is 'issue_rate - 1'. */
26471 if (can_issue_more == 0)
26472 can_issue_more = issue_rate;
26474 if (can_issue_more == 0)
26476 can_issue_more = issue_rate - 1;
26479 for (i = 0; i < issue_rate; i++)
26481 group_insns[i] = 0;
26488 emit_insn_before (nop, next_insn);
26489 if (can_issue_more == issue_rate - 1) /* new group begins */
26492 if (can_issue_more == 0)
26494 can_issue_more = issue_rate - 1;
26497 for (i = 0; i < issue_rate; i++)
26499 group_insns[i] = 0;
26505 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
26508 /* Is next_insn going to start a new group? */
26511 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26512 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26513 || (can_issue_more < issue_rate &&
26514 insn_terminates_group_p (next_insn, previous_group)));
26515 if (*group_end && end)
26518 if (sched_verbose > 6)
26519 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
26520 *group_count, can_issue_more);
26521 return can_issue_more;
26524 return can_issue_more;
26527 /* This function tries to synch the dispatch groups that the compiler "sees"
26528 with the dispatch groups that the processor dispatcher is expected to
26529 form in practice. It tries to achieve this synchronization by forcing the
26530 estimated processor grouping on the compiler (as opposed to the function
26531 'pad_goups' which tries to force the scheduler's grouping on the processor).
26533 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
26534 examines the (estimated) dispatch groups that will be formed by the processor
26535 dispatcher. It marks these group boundaries to reflect the estimated
26536 processor grouping, overriding the grouping that the scheduler had marked.
26537 Depending on the value of the flag '-minsert-sched-nops' this function can
26538 force certain insns into separate groups or force a certain distance between
26539 them by inserting nops, for example, if there exists a "costly dependence"
26542 The function estimates the group boundaries that the processor will form as
26543 follows: It keeps track of how many vacant issue slots are available after
26544 each insn. A subsequent insn will start a new group if one of the following
26546 - no more vacant issue slots remain in the current dispatch group.
26547 - only the last issue slot, which is the branch slot, is vacant, but the next
26548 insn is not a branch.
26549 - only the last 2 or less issue slots, including the branch slot, are vacant,
26550 which means that a cracked insn (which occupies two issue slots) can't be
26551 issued in this group.
26552 - less than 'issue_rate' slots are vacant, and the next insn always needs to
26553 start a new group. */
26556 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26559 rtx_insn *insn, *next_insn;
26561 int can_issue_more;
26564 int group_count = 0;
26568 issue_rate = rs6000_issue_rate ();
26569 group_insns = XALLOCAVEC (rtx, issue_rate);
26570 for (i = 0; i < issue_rate; i++)
26572 group_insns[i] = 0;
26574 can_issue_more = issue_rate;
26576 insn = get_next_active_insn (prev_head_insn, tail);
26579 while (insn != NULL_RTX)
26581 slot = (issue_rate - can_issue_more);
26582 group_insns[slot] = insn;
26584 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26585 if (insn_terminates_group_p (insn, current_group))
26586 can_issue_more = 0;
26588 next_insn = get_next_active_insn (insn, tail);
26589 if (next_insn == NULL_RTX)
26590 return group_count + 1;
26592 /* Is next_insn going to start a new group? */
26594 = (can_issue_more == 0
26595 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26596 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26597 || (can_issue_more < issue_rate &&
26598 insn_terminates_group_p (next_insn, previous_group)));
26600 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
26601 next_insn, &group_end, can_issue_more,
26607 can_issue_more = 0;
26608 for (i = 0; i < issue_rate; i++)
26610 group_insns[i] = 0;
26614 if (GET_MODE (next_insn) == TImode && can_issue_more)
26615 PUT_MODE (next_insn, VOIDmode);
26616 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
26617 PUT_MODE (next_insn, TImode);
26620 if (can_issue_more == 0)
26621 can_issue_more = issue_rate;
26624 return group_count;
26627 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
26628 dispatch group boundaries that the scheduler had marked. Pad with nops
26629 any dispatch groups which have vacant issue slots, in order to force the
26630 scheduler's grouping on the processor dispatcher. The function
26631 returns the number of dispatch groups found. */
26634 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26637 rtx_insn *insn, *next_insn;
26640 int can_issue_more;
26642 int group_count = 0;
26644 /* Initialize issue_rate. */
26645 issue_rate = rs6000_issue_rate ();
26646 can_issue_more = issue_rate;
26648 insn = get_next_active_insn (prev_head_insn, tail);
26649 next_insn = get_next_active_insn (insn, tail);
26651 while (insn != NULL_RTX)
26654 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26656 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
26658 if (next_insn == NULL_RTX)
26663 /* If the scheduler had marked group termination at this location
26664 (between insn and next_insn), and neither insn nor next_insn will
26665 force group termination, pad the group with nops to force group
26668 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
26669 && !insn_terminates_group_p (insn, current_group)
26670 && !insn_terminates_group_p (next_insn, previous_group))
26672 if (!is_branch_slot_insn (next_insn))
26675 while (can_issue_more)
26678 emit_insn_before (nop, next_insn);
26683 can_issue_more = issue_rate;
26688 next_insn = get_next_active_insn (insn, tail);
26691 return group_count;
26694 /* We're beginning a new block. Initialize data structures as necessary. */
26697 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
26698 int sched_verbose ATTRIBUTE_UNUSED,
26699 int max_ready ATTRIBUTE_UNUSED)
26701 last_scheduled_insn = NULL;
26702 load_store_pendulum = 0;
26707 /* The following function is called at the end of scheduling BB.
26708 After reload, it inserts nops at insn group bundling. */
26711 rs6000_sched_finish (FILE *dump, int sched_verbose)
26716 fprintf (dump, "=== Finishing schedule.\n");
26718 if (reload_completed && rs6000_sched_groups)
26720 /* Do not run sched_finish hook when selective scheduling enabled. */
26721 if (sel_sched_p ())
26724 if (rs6000_sched_insert_nops == sched_finish_none)
26727 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
26728 n_groups = pad_groups (dump, sched_verbose,
26729 current_sched_info->prev_head,
26730 current_sched_info->next_tail);
26732 n_groups = redefine_groups (dump, sched_verbose,
26733 current_sched_info->prev_head,
26734 current_sched_info->next_tail);
26736 if (sched_verbose >= 6)
26738 fprintf (dump, "ngroups = %d\n", n_groups);
26739 print_rtl (dump, current_sched_info->prev_head);
26740 fprintf (dump, "Done finish_sched\n");
26745 struct rs6000_sched_context
26747 short cached_can_issue_more;
26748 rtx_insn *last_scheduled_insn;
26749 int load_store_pendulum;
26754 typedef struct rs6000_sched_context rs6000_sched_context_def;
26755 typedef rs6000_sched_context_def *rs6000_sched_context_t;
26757 /* Allocate store for new scheduling context. */
26759 rs6000_alloc_sched_context (void)
26761 return xmalloc (sizeof (rs6000_sched_context_def));
26764 /* If CLEAN_P is true then initializes _SC with clean data,
26765 and from the global context otherwise. */
26767 rs6000_init_sched_context (void *_sc, bool clean_p)
26769 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26773 sc->cached_can_issue_more = 0;
26774 sc->last_scheduled_insn = NULL;
26775 sc->load_store_pendulum = 0;
26776 sc->divide_cnt = 0;
26777 sc->vec_pairing = 0;
26781 sc->cached_can_issue_more = cached_can_issue_more;
26782 sc->last_scheduled_insn = last_scheduled_insn;
26783 sc->load_store_pendulum = load_store_pendulum;
26784 sc->divide_cnt = divide_cnt;
26785 sc->vec_pairing = vec_pairing;
26789 /* Sets the global scheduling context to the one pointed to by _SC. */
26791 rs6000_set_sched_context (void *_sc)
26793 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26795 gcc_assert (sc != NULL);
26797 cached_can_issue_more = sc->cached_can_issue_more;
26798 last_scheduled_insn = sc->last_scheduled_insn;
26799 load_store_pendulum = sc->load_store_pendulum;
26800 divide_cnt = sc->divide_cnt;
26801 vec_pairing = sc->vec_pairing;
26806 rs6000_free_sched_context (void *_sc)
26808 gcc_assert (_sc != NULL);
26814 rs6000_sched_can_speculate_insn (rtx_insn *insn)
26816 switch (get_attr_type (insn))
26831 /* Length in units of the trampoline for entering a nested function. */
26834 rs6000_trampoline_size (void)
26838 switch (DEFAULT_ABI)
26841 gcc_unreachable ();
26844 ret = (TARGET_32BIT) ? 12 : 24;
26848 gcc_assert (!TARGET_32BIT);
26854 ret = (TARGET_32BIT) ? 40 : 48;
26861 /* Emit RTL insns to initialize the variable parts of a trampoline.
26862 FNADDR is an RTX for the address of the function's pure code.
26863 CXT is an RTX for the static chain value for the function. */
26866 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
26868 int regsize = (TARGET_32BIT) ? 4 : 8;
26869 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
26870 rtx ctx_reg = force_reg (Pmode, cxt);
26871 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
26873 switch (DEFAULT_ABI)
26876 gcc_unreachable ();
26878 /* Under AIX, just build the 3 word function descriptor */
26881 rtx fnmem, fn_reg, toc_reg;
26883 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
26884 error ("you cannot take the address of a nested function if you use "
26885 "the %qs option", "-mno-pointers-to-nested-functions");
26887 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
26888 fn_reg = gen_reg_rtx (Pmode);
26889 toc_reg = gen_reg_rtx (Pmode);
26891 /* Macro to shorten the code expansions below. */
26892 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
26894 m_tramp = replace_equiv_address (m_tramp, addr);
26896 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
26897 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
26898 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
26899 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
26900 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
26906 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
26910 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
26911 LCT_NORMAL, VOIDmode,
26913 GEN_INT (rs6000_trampoline_size ()), SImode,
26921 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
26922 identifier as an argument, so the front end shouldn't look it up. */
26925 rs6000_attribute_takes_identifier_p (const_tree attr_id)
26927 return is_attribute_p ("altivec", attr_id);
26930 /* Handle the "altivec" attribute. The attribute may have
26931 arguments as follows:
26933 __attribute__((altivec(vector__)))
26934 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
26935 __attribute__((altivec(bool__))) (always followed by 'unsigned')
26937 and may appear more than once (e.g., 'vector bool char') in a
26938 given declaration. */
26941 rs6000_handle_altivec_attribute (tree *node,
26942 tree name ATTRIBUTE_UNUSED,
26944 int flags ATTRIBUTE_UNUSED,
26945 bool *no_add_attrs)
26947 tree type = *node, result = NULL_TREE;
26951 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
26952 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
26953 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
26956 while (POINTER_TYPE_P (type)
26957 || TREE_CODE (type) == FUNCTION_TYPE
26958 || TREE_CODE (type) == METHOD_TYPE
26959 || TREE_CODE (type) == ARRAY_TYPE)
26960 type = TREE_TYPE (type);
26962 mode = TYPE_MODE (type);
26964 /* Check for invalid AltiVec type qualifiers. */
26965 if (type == long_double_type_node)
26966 error ("use of %<long double%> in AltiVec types is invalid");
26967 else if (type == boolean_type_node)
26968 error ("use of boolean types in AltiVec types is invalid");
26969 else if (TREE_CODE (type) == COMPLEX_TYPE)
26970 error ("use of %<complex%> in AltiVec types is invalid");
26971 else if (DECIMAL_FLOAT_MODE_P (mode))
26972 error ("use of decimal floating point types in AltiVec types is invalid");
26973 else if (!TARGET_VSX)
26975 if (type == long_unsigned_type_node || type == long_integer_type_node)
26978 error ("use of %<long%> in AltiVec types is invalid for "
26979 "64-bit code without %qs", "-mvsx");
26980 else if (rs6000_warn_altivec_long)
26981 warning (0, "use of %<long%> in AltiVec types is deprecated; "
26984 else if (type == long_long_unsigned_type_node
26985 || type == long_long_integer_type_node)
26986 error ("use of %<long long%> in AltiVec types is invalid without %qs",
26988 else if (type == double_type_node)
26989 error ("use of %<double%> in AltiVec types is invalid without %qs",
26993 switch (altivec_type)
26996 unsigned_p = TYPE_UNSIGNED (type);
27000 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
27003 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
27006 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
27009 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
27012 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
27014 case E_SFmode: result = V4SF_type_node; break;
27015 case E_DFmode: result = V2DF_type_node; break;
27016 /* If the user says 'vector int bool', we may be handed the 'bool'
27017 attribute _before_ the 'vector' attribute, and so select the
27018 proper type in the 'b' case below. */
27019 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
27020 case E_V2DImode: case E_V2DFmode:
27028 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
27029 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
27030 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
27031 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
27038 case E_V8HImode: result = pixel_V8HI_type_node;
27044 /* Propagate qualifiers attached to the element type
27045 onto the vector type. */
27046 if (result && result != type && TYPE_QUALS (type))
27047 result = build_qualified_type (result, TYPE_QUALS (type));
27049 *no_add_attrs = true; /* No need to hang on to the attribute. */
27052 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
27057 /* AltiVec defines five built-in scalar types that serve as vector
27058 elements; we must teach the compiler how to mangle them. The 128-bit
27059 floating point mangling is target-specific as well. */
27061 static const char *
27062 rs6000_mangle_type (const_tree type)
27064 type = TYPE_MAIN_VARIANT (type);
27066 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27067 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27070 if (type == bool_char_type_node) return "U6__boolc";
27071 if (type == bool_short_type_node) return "U6__bools";
27072 if (type == pixel_type_node) return "u7__pixel";
27073 if (type == bool_int_type_node) return "U6__booli";
27074 if (type == bool_long_long_type_node) return "U6__boolx";
27076 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
27078 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
27079 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
27081 /* For all other types, use the default mangling. */
27085 /* Handle a "longcall" or "shortcall" attribute; arguments as in
27086 struct attribute_spec.handler. */
27089 rs6000_handle_longcall_attribute (tree *node, tree name,
27090 tree args ATTRIBUTE_UNUSED,
27091 int flags ATTRIBUTE_UNUSED,
27092 bool *no_add_attrs)
27094 if (TREE_CODE (*node) != FUNCTION_TYPE
27095 && TREE_CODE (*node) != FIELD_DECL
27096 && TREE_CODE (*node) != TYPE_DECL)
27098 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27100 *no_add_attrs = true;
27106 /* Set longcall attributes on all functions declared when
27107 rs6000_default_long_calls is true. */
27109 rs6000_set_default_type_attributes (tree type)
27111 if (rs6000_default_long_calls
27112 && (TREE_CODE (type) == FUNCTION_TYPE
27113 || TREE_CODE (type) == METHOD_TYPE))
27114 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
27116 TYPE_ATTRIBUTES (type));
27119 darwin_set_default_type_attributes (type);
27123 /* Return a reference suitable for calling a function with the
27124 longcall attribute. */
27127 rs6000_longcall_ref (rtx call_ref, rtx arg)
27129 /* System V adds '.' to the internal name, so skip them. */
27130 const char *call_name = XSTR (call_ref, 0);
27131 if (*call_name == '.')
27133 while (*call_name == '.')
27136 tree node = get_identifier (call_name);
27137 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
27142 rtx base = const0_rtx;
27144 if (rs6000_pcrel_p (cfun))
27146 rtx reg = gen_rtx_REG (Pmode, regno);
27147 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27149 emit_insn (gen_rtx_SET (reg, u));
27153 if (DEFAULT_ABI == ABI_ELFv2)
27154 base = gen_rtx_REG (Pmode, TOC_REGISTER);
27158 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27161 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
27162 may be used by a function global entry point. For SysV4, r11
27163 is used by __glink_PLTresolve lazy resolver entry. */
27164 rtx reg = gen_rtx_REG (Pmode, regno);
27165 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27167 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
27169 emit_insn (gen_rtx_SET (reg, hi));
27170 emit_insn (gen_rtx_SET (reg, lo));
27174 return force_reg (Pmode, call_ref);
27177 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
27178 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
27181 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27182 struct attribute_spec.handler. */
27184 rs6000_handle_struct_attribute (tree *node, tree name,
27185 tree args ATTRIBUTE_UNUSED,
27186 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27189 if (DECL_P (*node))
27191 if (TREE_CODE (*node) == TYPE_DECL)
27192 type = &TREE_TYPE (*node);
27197 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27198 || TREE_CODE (*type) == UNION_TYPE)))
27200 warning (OPT_Wattributes, "%qE attribute ignored", name);
27201 *no_add_attrs = true;
27204 else if ((is_attribute_p ("ms_struct", name)
27205 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27206 || ((is_attribute_p ("gcc_struct", name)
27207 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27209 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27211 *no_add_attrs = true;
27218 rs6000_ms_bitfield_layout_p (const_tree record_type)
27220 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
27221 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27222 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27225 #ifdef USING_ELFOS_H
27227 /* A get_unnamed_section callback, used for switching to toc_section. */
27230 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
27232 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27233 && TARGET_MINIMAL_TOC)
27235 if (!toc_initialized)
27237 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27238 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27239 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
27240 fprintf (asm_out_file, "\t.tc ");
27241 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
27242 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27243 fprintf (asm_out_file, "\n");
27245 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27246 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27247 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27248 fprintf (asm_out_file, " = .+32768\n");
27249 toc_initialized = 1;
27252 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27254 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27256 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27257 if (!toc_initialized)
27259 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27260 toc_initialized = 1;
27265 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27266 if (!toc_initialized)
27268 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27269 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27270 fprintf (asm_out_file, " = .+32768\n");
27271 toc_initialized = 1;
27276 /* Implement TARGET_ASM_INIT_SECTIONS. */
27279 rs6000_elf_asm_init_sections (void)
27282 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
27285 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
27286 SDATA2_SECTION_ASM_OP);
27289 /* Implement TARGET_SELECT_RTX_SECTION. */
27292 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
27293 unsigned HOST_WIDE_INT align)
27295 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
27296 return toc_section;
27298 return default_elf_select_rtx_section (mode, x, align);
27301 /* For a SYMBOL_REF, set generic flags and then perform some
27302 target-specific processing.
27304 When the AIX ABI is requested on a non-AIX system, replace the
27305 function name with the real name (with a leading .) rather than the
27306 function descriptor name. This saves a lot of overriding code to
27307 read the prefixes. */
27309 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
27311 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
27313 default_encode_section_info (decl, rtl, first);
27316 && TREE_CODE (decl) == FUNCTION_DECL
27318 && DEFAULT_ABI == ABI_AIX)
27320 rtx sym_ref = XEXP (rtl, 0);
27321 size_t len = strlen (XSTR (sym_ref, 0));
27322 char *str = XALLOCAVEC (char, len + 2);
27324 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
27325 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
27330 compare_section_name (const char *section, const char *templ)
27334 len = strlen (templ);
27335 return (strncmp (section, templ, len) == 0
27336 && (section[len] == 0 || section[len] == '.'));
27340 rs6000_elf_in_small_data_p (const_tree decl)
27342 if (rs6000_sdata == SDATA_NONE)
27345 /* We want to merge strings, so we never consider them small data. */
27346 if (TREE_CODE (decl) == STRING_CST)
27349 /* Functions are never in the small data area. */
27350 if (TREE_CODE (decl) == FUNCTION_DECL)
27353 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
27355 const char *section = DECL_SECTION_NAME (decl);
27356 if (compare_section_name (section, ".sdata")
27357 || compare_section_name (section, ".sdata2")
27358 || compare_section_name (section, ".gnu.linkonce.s")
27359 || compare_section_name (section, ".sbss")
27360 || compare_section_name (section, ".sbss2")
27361 || compare_section_name (section, ".gnu.linkonce.sb")
27362 || strcmp (section, ".PPC.EMB.sdata0") == 0
27363 || strcmp (section, ".PPC.EMB.sbss0") == 0)
27368 /* If we are told not to put readonly data in sdata, then don't. */
27369 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
27370 && !rs6000_readonly_in_sdata)
27373 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
27376 && size <= g_switch_value
27377 /* If it's not public, and we're not going to reference it there,
27378 there's no need to put it in the small data section. */
27379 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
27386 #endif /* USING_ELFOS_H */
27388 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
27391 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
27393 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
27396 /* Do not place thread-local symbols refs in the object blocks. */
27399 rs6000_use_blocks_for_decl_p (const_tree decl)
27401 return !DECL_THREAD_LOCAL_P (decl);
27404 /* Return a REG that occurs in ADDR with coefficient 1.
27405 ADDR can be effectively incremented by incrementing REG.
27407 r0 is special and we must not select it as an address
27408 register by this routine since our caller will try to
27409 increment the returned register via an "la" instruction. */
27412 find_addr_reg (rtx addr)
27414 while (GET_CODE (addr) == PLUS)
27416 if (REG_P (XEXP (addr, 0))
27417 && REGNO (XEXP (addr, 0)) != 0)
27418 addr = XEXP (addr, 0);
27419 else if (REG_P (XEXP (addr, 1))
27420 && REGNO (XEXP (addr, 1)) != 0)
27421 addr = XEXP (addr, 1);
27422 else if (CONSTANT_P (XEXP (addr, 0)))
27423 addr = XEXP (addr, 1);
27424 else if (CONSTANT_P (XEXP (addr, 1)))
27425 addr = XEXP (addr, 0);
27427 gcc_unreachable ();
27429 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
27434 rs6000_fatal_bad_address (rtx op)
27436 fatal_insn ("bad address", op);
27441 typedef struct branch_island_d {
27442 tree function_name;
27448 static vec<branch_island, va_gc> *branch_islands;
27450 /* Remember to generate a branch island for far calls to the given
27454 add_compiler_branch_island (tree label_name, tree function_name,
27457 branch_island bi = {function_name, label_name, line_number};
27458 vec_safe_push (branch_islands, bi);
27461 /* Generate far-jump branch islands for everything recorded in
27462 branch_islands. Invoked immediately after the last instruction of
27463 the epilogue has been emitted; the branch islands must be appended
27464 to, and contiguous with, the function body. Mach-O stubs are
27465 generated in machopic_output_stub(). */
27468 macho_branch_islands (void)
27472 while (!vec_safe_is_empty (branch_islands))
27474 branch_island *bi = &branch_islands->last ();
27475 const char *label = IDENTIFIER_POINTER (bi->label_name);
27476 const char *name = IDENTIFIER_POINTER (bi->function_name);
27477 char name_buf[512];
27478 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
27479 if (name[0] == '*' || name[0] == '&')
27480 strcpy (name_buf, name+1);
27484 strcpy (name_buf+1, name);
27486 strcpy (tmp_buf, "\n");
27487 strcat (tmp_buf, label);
27488 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
27489 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
27490 dbxout_stabd (N_SLINE, bi->line_number);
27491 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
27494 if (TARGET_LINK_STACK)
27497 get_ppc476_thunk_name (name);
27498 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
27499 strcat (tmp_buf, name);
27500 strcat (tmp_buf, "\n");
27501 strcat (tmp_buf, label);
27502 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
27506 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
27507 strcat (tmp_buf, label);
27508 strcat (tmp_buf, "_pic\n");
27509 strcat (tmp_buf, label);
27510 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
27513 strcat (tmp_buf, "\taddis r11,r11,ha16(");
27514 strcat (tmp_buf, name_buf);
27515 strcat (tmp_buf, " - ");
27516 strcat (tmp_buf, label);
27517 strcat (tmp_buf, "_pic)\n");
27519 strcat (tmp_buf, "\tmtlr r0\n");
27521 strcat (tmp_buf, "\taddi r12,r11,lo16(");
27522 strcat (tmp_buf, name_buf);
27523 strcat (tmp_buf, " - ");
27524 strcat (tmp_buf, label);
27525 strcat (tmp_buf, "_pic)\n");
27527 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
27531 strcat (tmp_buf, ":\n\tlis r12,hi16(");
27532 strcat (tmp_buf, name_buf);
27533 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
27534 strcat (tmp_buf, name_buf);
27535 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
27537 output_asm_insn (tmp_buf, 0);
27538 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
27539 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
27540 dbxout_stabd (N_SLINE, bi->line_number);
27541 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
27542 branch_islands->pop ();
27546 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
27547 already there or not. */
27550 no_previous_def (tree function_name)
27555 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27556 if (function_name == bi->function_name)
27561 /* GET_PREV_LABEL gets the label name from the previous definition of
27565 get_prev_label (tree function_name)
27570 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27571 if (function_name == bi->function_name)
27572 return bi->label_name;
27576 /* Generate PIC and indirect symbol stubs. */
27579 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27581 unsigned int length;
27582 char *symbol_name, *lazy_ptr_name;
27583 char *local_label_0;
27584 static unsigned label = 0;
27586 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27587 symb = (*targetm.strip_name_encoding) (symb);
27590 length = strlen (symb);
27591 symbol_name = XALLOCAVEC (char, length + 32);
27592 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27594 lazy_ptr_name = XALLOCAVEC (char, length + 32);
27595 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
27598 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
27600 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
27604 fprintf (file, "\t.align 5\n");
27606 fprintf (file, "%s:\n", stub);
27607 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27610 local_label_0 = XALLOCAVEC (char, 16);
27611 sprintf (local_label_0, "L%u$spb", label);
27613 fprintf (file, "\tmflr r0\n");
27614 if (TARGET_LINK_STACK)
27617 get_ppc476_thunk_name (name);
27618 fprintf (file, "\tbl %s\n", name);
27619 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27623 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
27624 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27626 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
27627 lazy_ptr_name, local_label_0);
27628 fprintf (file, "\tmtlr r0\n");
27629 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
27630 (TARGET_64BIT ? "ldu" : "lwzu"),
27631 lazy_ptr_name, local_label_0);
27632 fprintf (file, "\tmtctr r12\n");
27633 fprintf (file, "\tbctr\n");
27637 fprintf (file, "\t.align 4\n");
27639 fprintf (file, "%s:\n", stub);
27640 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27642 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
27643 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
27644 (TARGET_64BIT ? "ldu" : "lwzu"),
27646 fprintf (file, "\tmtctr r12\n");
27647 fprintf (file, "\tbctr\n");
27650 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27651 fprintf (file, "%s:\n", lazy_ptr_name);
27652 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27653 fprintf (file, "%sdyld_stub_binding_helper\n",
27654 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
27657 /* Legitimize PIC addresses. If the address is already
27658 position-independent, we return ORIG. Newly generated
27659 position-independent addresses go into a reg. This is REG if non
27660 zero, otherwise we allocate register(s) as necessary. */
27662 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
27665 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
27670 if (reg == NULL && !reload_completed)
27671 reg = gen_reg_rtx (Pmode);
27673 if (GET_CODE (orig) == CONST)
27677 if (GET_CODE (XEXP (orig, 0)) == PLUS
27678 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
27681 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
27683 /* Use a different reg for the intermediate value, as
27684 it will be marked UNCHANGING. */
27685 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
27686 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
27689 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
27692 if (CONST_INT_P (offset))
27694 if (SMALL_INT (offset))
27695 return plus_constant (Pmode, base, INTVAL (offset));
27696 else if (!reload_completed)
27697 offset = force_reg (Pmode, offset);
27700 rtx mem = force_const_mem (Pmode, orig);
27701 return machopic_legitimize_pic_address (mem, Pmode, reg);
27704 return gen_rtx_PLUS (Pmode, base, offset);
27707 /* Fall back on generic machopic code. */
27708 return machopic_legitimize_pic_address (orig, mode, reg);
27711 /* Output a .machine directive for the Darwin assembler, and call
27712 the generic start_file routine. */
27715 rs6000_darwin_file_start (void)
27717 static const struct
27721 HOST_WIDE_INT if_set;
27723 { "ppc64", "ppc64", MASK_64BIT },
27724 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
27725 { "power4", "ppc970", 0 },
27726 { "G5", "ppc970", 0 },
27727 { "7450", "ppc7450", 0 },
27728 { "7400", "ppc7400", MASK_ALTIVEC },
27729 { "G4", "ppc7400", 0 },
27730 { "750", "ppc750", 0 },
27731 { "740", "ppc750", 0 },
27732 { "G3", "ppc750", 0 },
27733 { "604e", "ppc604e", 0 },
27734 { "604", "ppc604", 0 },
27735 { "603e", "ppc603", 0 },
27736 { "603", "ppc603", 0 },
27737 { "601", "ppc601", 0 },
27738 { NULL, "ppc", 0 } };
27739 const char *cpu_id = "";
27742 rs6000_file_start ();
27743 darwin_file_start ();
27745 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
27747 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
27748 cpu_id = rs6000_default_cpu;
27750 if (global_options_set.x_rs6000_cpu_index)
27751 cpu_id = processor_target_table[rs6000_cpu_index].name;
27753 /* Look through the mapping array. Pick the first name that either
27754 matches the argument, has a bit set in IF_SET that is also set
27755 in the target flags, or has a NULL name. */
27758 while (mapping[i].arg != NULL
27759 && strcmp (mapping[i].arg, cpu_id) != 0
27760 && (mapping[i].if_set & rs6000_isa_flags) == 0)
27763 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
27766 #endif /* TARGET_MACHO */
27770 rs6000_elf_reloc_rw_mask (void)
27774 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27780 /* Record an element in the table of global constructors. SYMBOL is
27781 a SYMBOL_REF of the function to be called; PRIORITY is a number
27782 between 0 and MAX_INIT_PRIORITY.
27784 This differs from default_named_section_asm_out_constructor in
27785 that we have special handling for -mrelocatable. */
27787 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
27789 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
27791 const char *section = ".ctors";
27794 if (priority != DEFAULT_INIT_PRIORITY)
27796 sprintf (buf, ".ctors.%.5u",
27797 /* Invert the numbering so the linker puts us in the proper
27798 order; constructors are run from right to left, and the
27799 linker sorts in increasing order. */
27800 MAX_INIT_PRIORITY - priority);
27804 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27805 assemble_align (POINTER_SIZE);
27807 if (DEFAULT_ABI == ABI_V4
27808 && (TARGET_RELOCATABLE || flag_pic > 1))
27810 fputs ("\t.long (", asm_out_file);
27811 output_addr_const (asm_out_file, symbol);
27812 fputs (")@fixup\n", asm_out_file);
27815 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27818 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
27820 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
27822 const char *section = ".dtors";
27825 if (priority != DEFAULT_INIT_PRIORITY)
27827 sprintf (buf, ".dtors.%.5u",
27828 /* Invert the numbering so the linker puts us in the proper
27829 order; constructors are run from right to left, and the
27830 linker sorts in increasing order. */
27831 MAX_INIT_PRIORITY - priority);
27835 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27836 assemble_align (POINTER_SIZE);
27838 if (DEFAULT_ABI == ABI_V4
27839 && (TARGET_RELOCATABLE || flag_pic > 1))
27841 fputs ("\t.long (", asm_out_file);
27842 output_addr_const (asm_out_file, symbol);
27843 fputs (")@fixup\n", asm_out_file);
27846 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27850 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
27852 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
27854 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
27855 ASM_OUTPUT_LABEL (file, name);
27856 fputs (DOUBLE_INT_ASM_OP, file);
27857 rs6000_output_function_entry (file, name);
27858 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
27861 fputs ("\t.size\t", file);
27862 assemble_name (file, name);
27863 fputs (",24\n\t.type\t.", file);
27864 assemble_name (file, name);
27865 fputs (",@function\n", file);
27866 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
27868 fputs ("\t.globl\t.", file);
27869 assemble_name (file, name);
27874 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27875 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27876 rs6000_output_function_entry (file, name);
27877 fputs (":\n", file);
27882 if (DEFAULT_ABI == ABI_V4
27883 && (TARGET_RELOCATABLE || flag_pic > 1)
27884 && !TARGET_SECURE_PLT
27885 && (!constant_pool_empty_p () || crtl->profile)
27886 && (uses_toc = uses_TOC ()))
27891 switch_to_other_text_partition ();
27892 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27894 fprintf (file, "\t.long ");
27895 assemble_name (file, toc_label_name);
27898 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27899 assemble_name (file, buf);
27902 switch_to_other_text_partition ();
27905 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27906 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27908 if (TARGET_CMODEL == CMODEL_LARGE
27909 && rs6000_global_entry_point_prologue_needed_p ())
27913 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27915 fprintf (file, "\t.quad .TOC.-");
27916 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27917 assemble_name (file, buf);
27921 if (DEFAULT_ABI == ABI_AIX)
27923 const char *desc_name, *orig_name;
27925 orig_name = (*targetm.strip_name_encoding) (name);
27926 desc_name = orig_name;
27927 while (*desc_name == '.')
27930 if (TREE_PUBLIC (decl))
27931 fprintf (file, "\t.globl %s\n", desc_name);
27933 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27934 fprintf (file, "%s:\n", desc_name);
27935 fprintf (file, "\t.long %s\n", orig_name);
27936 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
27937 fputs ("\t.long 0\n", file);
27938 fprintf (file, "\t.previous\n");
27940 ASM_OUTPUT_LABEL (file, name);
27943 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
27945 rs6000_elf_file_end (void)
27947 #ifdef HAVE_AS_GNU_ATTRIBUTE
27948 /* ??? The value emitted depends on options active at file end.
27949 Assume anyone using #pragma or attributes that might change
27950 options knows what they are doing. */
27951 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
27952 && rs6000_passes_float)
27956 if (TARGET_HARD_FLOAT)
27960 if (rs6000_passes_long_double)
27962 if (!TARGET_LONG_DOUBLE_128)
27964 else if (TARGET_IEEEQUAD)
27969 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
27971 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
27973 if (rs6000_passes_vector)
27974 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
27975 (TARGET_ALTIVEC_ABI ? 2 : 1));
27976 if (rs6000_returns_struct)
27977 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
27978 aix_struct_return ? 2 : 1);
27981 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27982 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
27983 file_end_indicate_exec_stack ();
27986 if (flag_split_stack)
27987 file_end_indicate_split_stack ();
27991 /* We have expanded a CPU builtin, so we need to emit a reference to
27992 the special symbol that LIBC uses to declare it supports the
27993 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
27994 switch_to_section (data_section);
27995 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
27996 fprintf (asm_out_file, "\t%s %s\n",
27997 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
28004 #ifndef HAVE_XCOFF_DWARF_EXTRAS
28005 #define HAVE_XCOFF_DWARF_EXTRAS 0
28008 static enum unwind_info_type
28009 rs6000_xcoff_debug_unwind_info (void)
28015 rs6000_xcoff_asm_output_anchor (rtx symbol)
28019 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
28020 SYMBOL_REF_BLOCK_OFFSET (symbol));
28021 fprintf (asm_out_file, "%s", SET_ASM_OP);
28022 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
28023 fprintf (asm_out_file, ",");
28024 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
28025 fprintf (asm_out_file, "\n");
28029 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
28031 fputs (GLOBAL_ASM_OP, stream);
28032 RS6000_OUTPUT_BASENAME (stream, name);
28033 putc ('\n', stream);
28036 /* A get_unnamed_decl callback, used for read-only sections. PTR
28037 points to the section string variable. */
28040 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
28042 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
28043 *(const char *const *) directive,
28044 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28047 /* Likewise for read-write sections. */
28050 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
28052 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
28053 *(const char *const *) directive,
28054 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28058 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
28060 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
28061 *(const char *const *) directive,
28062 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28065 /* A get_unnamed_section callback, used for switching to toc_section. */
28068 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28070 if (TARGET_MINIMAL_TOC)
28072 /* toc_section is always selected at least once from
28073 rs6000_xcoff_file_start, so this is guaranteed to
28074 always be defined once and only once in each file. */
28075 if (!toc_initialized)
28077 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
28078 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
28079 toc_initialized = 1;
28081 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
28082 (TARGET_32BIT ? "" : ",3"));
28085 fputs ("\t.toc\n", asm_out_file);
28088 /* Implement TARGET_ASM_INIT_SECTIONS. */
28091 rs6000_xcoff_asm_init_sections (void)
28093 read_only_data_section
28094 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28095 &xcoff_read_only_section_name);
28097 private_data_section
28098 = get_unnamed_section (SECTION_WRITE,
28099 rs6000_xcoff_output_readwrite_section_asm_op,
28100 &xcoff_private_data_section_name);
28102 read_only_private_data_section
28103 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28104 &xcoff_private_rodata_section_name);
28107 = get_unnamed_section (SECTION_TLS,
28108 rs6000_xcoff_output_tls_section_asm_op,
28109 &xcoff_tls_data_section_name);
28111 tls_private_data_section
28112 = get_unnamed_section (SECTION_TLS,
28113 rs6000_xcoff_output_tls_section_asm_op,
28114 &xcoff_private_data_section_name);
28117 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
28119 readonly_data_section = read_only_data_section;
28123 rs6000_xcoff_reloc_rw_mask (void)
28129 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
28130 tree decl ATTRIBUTE_UNUSED)
28133 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
28135 if (flags & SECTION_EXCLUDE)
28137 else if (flags & SECTION_DEBUG)
28139 fprintf (asm_out_file, "\t.dwsect %s\n", name);
28142 else if (flags & SECTION_CODE)
28144 else if (flags & SECTION_TLS)
28146 else if (flags & SECTION_WRITE)
28151 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
28152 (flags & SECTION_CODE) ? "." : "",
28153 name, suffix[smclass], flags & SECTION_ENTSIZE);
28156 #define IN_NAMED_SECTION(DECL) \
28157 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
28158 && DECL_SECTION_NAME (DECL) != NULL)
28161 rs6000_xcoff_select_section (tree decl, int reloc,
28162 unsigned HOST_WIDE_INT align)
28164 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
28166 if (align > BIGGEST_ALIGNMENT)
28168 resolve_unique_section (decl, reloc, true);
28169 if (IN_NAMED_SECTION (decl))
28170 return get_named_section (decl, NULL, reloc);
28173 if (decl_readonly_section (decl, reloc))
28175 if (TREE_PUBLIC (decl))
28176 return read_only_data_section;
28178 return read_only_private_data_section;
28183 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28185 if (TREE_PUBLIC (decl))
28186 return tls_data_section;
28187 else if (bss_initializer_p (decl))
28189 /* Convert to COMMON to emit in BSS. */
28190 DECL_COMMON (decl) = 1;
28191 return tls_comm_section;
28194 return tls_private_data_section;
28198 if (TREE_PUBLIC (decl))
28199 return data_section;
28201 return private_data_section;
28206 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
28210 /* Use select_section for private data and uninitialized data with
28211 alignment <= BIGGEST_ALIGNMENT. */
28212 if (!TREE_PUBLIC (decl)
28213 || DECL_COMMON (decl)
28214 || (DECL_INITIAL (decl) == NULL_TREE
28215 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
28216 || DECL_INITIAL (decl) == error_mark_node
28217 || (flag_zero_initialized_in_bss
28218 && initializer_zerop (DECL_INITIAL (decl))))
28221 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
28222 name = (*targetm.strip_name_encoding) (name);
28223 set_decl_section_name (decl, name);
28226 /* Select section for constant in constant pool.
28228 On RS/6000, all constants are in the private read-only data area.
28229 However, if this is being placed in the TOC it must be output as a
28233 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
28234 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
28236 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28237 return toc_section;
28239 return read_only_private_data_section;
28242 /* Remove any trailing [DS] or the like from the symbol name. */
28244 static const char *
28245 rs6000_xcoff_strip_name_encoding (const char *name)
28250 len = strlen (name);
28251 if (name[len - 1] == ']')
28252 return ggc_alloc_string (name, len - 4);
28257 /* Section attributes. AIX is always PIC. */
28259 static unsigned int
28260 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
28262 unsigned int align;
28263 unsigned int flags = default_section_type_flags (decl, name, reloc);
28265 /* Align to at least UNIT size. */
28266 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
28267 align = MIN_UNITS_PER_WORD;
28269 /* Increase alignment of large objects if not already stricter. */
28270 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
28271 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
28272 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
28274 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
28277 /* Output at beginning of assembler file.
28279 Initialize the section names for the RS/6000 at this point.
28281 Specify filename, including full path, to assembler.
28283 We want to go into the TOC section so at least one .toc will be emitted.
28284 Also, in order to output proper .bs/.es pairs, we need at least one static
28285 [RW] section emitted.
28287 Finally, declare mcount when profiling to make the assembler happy. */
28290 rs6000_xcoff_file_start (void)
28292 rs6000_gen_section_name (&xcoff_bss_section_name,
28293 main_input_filename, ".bss_");
28294 rs6000_gen_section_name (&xcoff_private_data_section_name,
28295 main_input_filename, ".rw_");
28296 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
28297 main_input_filename, ".rop_");
28298 rs6000_gen_section_name (&xcoff_read_only_section_name,
28299 main_input_filename, ".ro_");
28300 rs6000_gen_section_name (&xcoff_tls_data_section_name,
28301 main_input_filename, ".tls_");
28302 rs6000_gen_section_name (&xcoff_tbss_section_name,
28303 main_input_filename, ".tbss_[UL]");
28305 fputs ("\t.file\t", asm_out_file);
28306 output_quoted_string (asm_out_file, main_input_filename);
28307 fputc ('\n', asm_out_file);
28308 if (write_symbols != NO_DEBUG)
28309 switch_to_section (private_data_section);
28310 switch_to_section (toc_section);
28311 switch_to_section (text_section);
28313 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
28314 rs6000_file_start ();
28317 /* Output at end of assembler file.
28318 On the RS/6000, referencing data should automatically pull in text. */
28321 rs6000_xcoff_file_end (void)
28323 switch_to_section (text_section);
28324 fputs ("_section_.text:\n", asm_out_file);
28325 switch_to_section (data_section);
28326 fputs (TARGET_32BIT
28327 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
28331 struct declare_alias_data
28334 bool function_descriptor;
28337 /* Declare alias N. A helper function for for_node_and_aliases. */
28340 rs6000_declare_alias (struct symtab_node *n, void *d)
28342 struct declare_alias_data *data = (struct declare_alias_data *)d;
28343 /* Main symbol is output specially, because varasm machinery does part of
28344 the job for us - we do not need to declare .globl/lglobs and such. */
28345 if (!n->alias || n->weakref)
28348 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
28351 /* Prevent assemble_alias from trying to use .set pseudo operation
28352 that does not behave as expected by the middle-end. */
28353 TREE_ASM_WRITTEN (n->decl) = true;
28355 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
28356 char *buffer = (char *) alloca (strlen (name) + 2);
28358 int dollar_inside = 0;
28360 strcpy (buffer, name);
28361 p = strchr (buffer, '$');
28365 p = strchr (p + 1, '$');
28367 if (TREE_PUBLIC (n->decl))
28369 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
28371 if (dollar_inside) {
28372 if (data->function_descriptor)
28373 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28374 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28376 if (data->function_descriptor)
28378 fputs ("\t.globl .", data->file);
28379 RS6000_OUTPUT_BASENAME (data->file, buffer);
28380 putc ('\n', data->file);
28382 fputs ("\t.globl ", data->file);
28383 RS6000_OUTPUT_BASENAME (data->file, buffer);
28384 putc ('\n', data->file);
28386 #ifdef ASM_WEAKEN_DECL
28387 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
28388 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
28395 if (data->function_descriptor)
28396 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28397 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28399 if (data->function_descriptor)
28401 fputs ("\t.lglobl .", data->file);
28402 RS6000_OUTPUT_BASENAME (data->file, buffer);
28403 putc ('\n', data->file);
28405 fputs ("\t.lglobl ", data->file);
28406 RS6000_OUTPUT_BASENAME (data->file, buffer);
28407 putc ('\n', data->file);
28409 if (data->function_descriptor)
28410 fputs (".", data->file);
28411 RS6000_OUTPUT_BASENAME (data->file, buffer);
28412 fputs (":\n", data->file);
28417 #ifdef HAVE_GAS_HIDDEN
28418 /* Helper function to calculate visibility of a DECL
28419 and return the value as a const string. */
28421 static const char *
28422 rs6000_xcoff_visibility (tree decl)
28424 static const char * const visibility_types[] = {
28425 "", ",protected", ",hidden", ",internal"
28428 enum symbol_visibility vis = DECL_VISIBILITY (decl);
28429 return visibility_types[vis];
28434 /* This macro produces the initial definition of a function name.
28435 On the RS/6000, we need to place an extra '.' in the function name and
28436 output the function descriptor.
28437 Dollar signs are converted to underscores.
28439 The csect for the function will have already been created when
28440 text_section was selected. We do have to go back to that csect, however.
28442 The third and fourth parameters to the .function pseudo-op (16 and 044)
28443 are placeholders which no longer have any use.
28445 Because AIX assembler's .set command has unexpected semantics, we output
28446 all aliases as alternative labels in front of the definition. */
28449 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
28451 char *buffer = (char *) alloca (strlen (name) + 1);
28453 int dollar_inside = 0;
28454 struct declare_alias_data data = {file, false};
28456 strcpy (buffer, name);
28457 p = strchr (buffer, '$');
28461 p = strchr (p + 1, '$');
28463 if (TREE_PUBLIC (decl))
28465 if (!RS6000_WEAK || !DECL_WEAK (decl))
28467 if (dollar_inside) {
28468 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28469 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28471 fputs ("\t.globl .", file);
28472 RS6000_OUTPUT_BASENAME (file, buffer);
28473 #ifdef HAVE_GAS_HIDDEN
28474 fputs (rs6000_xcoff_visibility (decl), file);
28481 if (dollar_inside) {
28482 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28483 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28485 fputs ("\t.lglobl .", file);
28486 RS6000_OUTPUT_BASENAME (file, buffer);
28489 fputs ("\t.csect ", file);
28490 RS6000_OUTPUT_BASENAME (file, buffer);
28491 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
28492 RS6000_OUTPUT_BASENAME (file, buffer);
28493 fputs (":\n", file);
28494 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28496 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
28497 RS6000_OUTPUT_BASENAME (file, buffer);
28498 fputs (", TOC[tc0], 0\n", file);
28500 switch_to_section (function_section (decl));
28502 RS6000_OUTPUT_BASENAME (file, buffer);
28503 fputs (":\n", file);
28504 data.function_descriptor = true;
28505 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28507 if (!DECL_IGNORED_P (decl))
28509 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28510 xcoffout_declare_function (file, decl, buffer);
28511 else if (write_symbols == DWARF2_DEBUG)
28513 name = (*targetm.strip_name_encoding) (name);
28514 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
28521 /* Output assembly language to globalize a symbol from a DECL,
28522 possibly with visibility. */
28525 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
28527 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
28528 fputs (GLOBAL_ASM_OP, stream);
28529 RS6000_OUTPUT_BASENAME (stream, name);
28530 #ifdef HAVE_GAS_HIDDEN
28531 fputs (rs6000_xcoff_visibility (decl), stream);
28533 putc ('\n', stream);
28536 /* Output assembly language to define a symbol as COMMON from a DECL,
28537 possibly with visibility. */
28540 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
28541 tree decl ATTRIBUTE_UNUSED,
28543 unsigned HOST_WIDE_INT size,
28544 unsigned HOST_WIDE_INT align)
28546 unsigned HOST_WIDE_INT align2 = 2;
28549 align2 = floor_log2 (align / BITS_PER_UNIT);
28553 fputs (COMMON_ASM_OP, stream);
28554 RS6000_OUTPUT_BASENAME (stream, name);
28557 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
28560 #ifdef HAVE_GAS_HIDDEN
28562 fputs (rs6000_xcoff_visibility (decl), stream);
28564 putc ('\n', stream);
28567 /* This macro produces the initial definition of a object (variable) name.
28568 Because AIX assembler's .set command has unexpected semantics, we output
28569 all aliases as alternative labels in front of the definition. */
28572 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
28574 struct declare_alias_data data = {file, false};
28575 RS6000_OUTPUT_BASENAME (file, name);
28576 fputs (":\n", file);
28577 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28581 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
28584 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
28586 fputs (integer_asm_op (size, FALSE), file);
28587 assemble_name (file, label);
28588 fputs ("-$", file);
28591 /* Output a symbol offset relative to the dbase for the current object.
28592 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
28595 __gcc_unwind_dbase is embedded in all executables/libraries through
28596 libgcc/config/rs6000/crtdbase.S. */
28599 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
28601 fputs (integer_asm_op (size, FALSE), file);
28602 assemble_name (file, label);
28603 fputs("-__gcc_unwind_dbase", file);
28608 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
28612 const char *symname;
28614 default_encode_section_info (decl, rtl, first);
28616 /* Careful not to prod global register variables. */
28619 symbol = XEXP (rtl, 0);
28620 if (!SYMBOL_REF_P (symbol))
28623 flags = SYMBOL_REF_FLAGS (symbol);
28625 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28626 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
28628 SYMBOL_REF_FLAGS (symbol) = flags;
28630 /* Append mapping class to extern decls. */
28631 symname = XSTR (symbol, 0);
28632 if (decl /* sync condition with assemble_external () */
28633 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
28634 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
28635 || TREE_CODE (decl) == FUNCTION_DECL)
28636 && symname[strlen (symname) - 1] != ']')
28638 char *newname = (char *) alloca (strlen (symname) + 5);
28639 strcpy (newname, symname);
28640 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
28641 ? "[DS]" : "[UA]"));
28642 XSTR (symbol, 0) = ggc_strdup (newname);
28645 #endif /* HAVE_AS_TLS */
28646 #endif /* TARGET_XCOFF */
28649 rs6000_asm_weaken_decl (FILE *stream, tree decl,
28650 const char *name, const char *val)
28652 fputs ("\t.weak\t", stream);
28653 RS6000_OUTPUT_BASENAME (stream, name);
28654 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28655 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28658 fputs ("[DS]", stream);
28659 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28661 fputs (rs6000_xcoff_visibility (decl), stream);
28663 fputs ("\n\t.weak\t.", stream);
28664 RS6000_OUTPUT_BASENAME (stream, name);
28666 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28668 fputs (rs6000_xcoff_visibility (decl), stream);
28670 fputc ('\n', stream);
28673 #ifdef ASM_OUTPUT_DEF
28674 ASM_OUTPUT_DEF (stream, name, val);
28676 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28677 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28679 fputs ("\t.set\t.", stream);
28680 RS6000_OUTPUT_BASENAME (stream, name);
28681 fputs (",.", stream);
28682 RS6000_OUTPUT_BASENAME (stream, val);
28683 fputc ('\n', stream);
28689 /* Return true if INSN should not be copied. */
28692 rs6000_cannot_copy_insn_p (rtx_insn *insn)
28694 return recog_memoized (insn) >= 0
28695 && get_attr_cannot_copy (insn);
28698 /* Compute a (partial) cost for rtx X. Return true if the complete
28699 cost has been computed, and false if subexpressions should be
28700 scanned. In either case, *TOTAL contains the cost result. */
28703 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
28704 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
28706 int code = GET_CODE (x);
28710 /* On the RS/6000, if it is valid in the insn, it is free. */
28712 if (((outer_code == SET
28713 || outer_code == PLUS
28714 || outer_code == MINUS)
28715 && (satisfies_constraint_I (x)
28716 || satisfies_constraint_L (x)))
28717 || (outer_code == AND
28718 && (satisfies_constraint_K (x)
28720 ? satisfies_constraint_L (x)
28721 : satisfies_constraint_J (x))))
28722 || ((outer_code == IOR || outer_code == XOR)
28723 && (satisfies_constraint_K (x)
28725 ? satisfies_constraint_L (x)
28726 : satisfies_constraint_J (x))))
28727 || outer_code == ASHIFT
28728 || outer_code == ASHIFTRT
28729 || outer_code == LSHIFTRT
28730 || outer_code == ROTATE
28731 || outer_code == ROTATERT
28732 || outer_code == ZERO_EXTRACT
28733 || (outer_code == MULT
28734 && satisfies_constraint_I (x))
28735 || ((outer_code == DIV || outer_code == UDIV
28736 || outer_code == MOD || outer_code == UMOD)
28737 && exact_log2 (INTVAL (x)) >= 0)
28738 || (outer_code == COMPARE
28739 && (satisfies_constraint_I (x)
28740 || satisfies_constraint_K (x)))
28741 || ((outer_code == EQ || outer_code == NE)
28742 && (satisfies_constraint_I (x)
28743 || satisfies_constraint_K (x)
28745 ? satisfies_constraint_L (x)
28746 : satisfies_constraint_J (x))))
28747 || (outer_code == GTU
28748 && satisfies_constraint_I (x))
28749 || (outer_code == LTU
28750 && satisfies_constraint_P (x)))
28755 else if ((outer_code == PLUS
28756 && reg_or_add_cint_operand (x, VOIDmode))
28757 || (outer_code == MINUS
28758 && reg_or_sub_cint_operand (x, VOIDmode))
28759 || ((outer_code == SET
28760 || outer_code == IOR
28761 || outer_code == XOR)
28763 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
28765 *total = COSTS_N_INSNS (1);
28771 case CONST_WIDE_INT:
28775 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28779 /* When optimizing for size, MEM should be slightly more expensive
28780 than generating address, e.g., (plus (reg) (const)).
28781 L1 cache latency is about two instructions. */
28782 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28783 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
28784 *total += COSTS_N_INSNS (100);
28793 if (FLOAT_MODE_P (mode))
28794 *total = rs6000_cost->fp;
28796 *total = COSTS_N_INSNS (1);
28800 if (CONST_INT_P (XEXP (x, 1))
28801 && satisfies_constraint_I (XEXP (x, 1)))
28803 if (INTVAL (XEXP (x, 1)) >= -256
28804 && INTVAL (XEXP (x, 1)) <= 255)
28805 *total = rs6000_cost->mulsi_const9;
28807 *total = rs6000_cost->mulsi_const;
28809 else if (mode == SFmode)
28810 *total = rs6000_cost->fp;
28811 else if (FLOAT_MODE_P (mode))
28812 *total = rs6000_cost->dmul;
28813 else if (mode == DImode)
28814 *total = rs6000_cost->muldi;
28816 *total = rs6000_cost->mulsi;
28820 if (mode == SFmode)
28821 *total = rs6000_cost->fp;
28823 *total = rs6000_cost->dmul;
28828 if (FLOAT_MODE_P (mode))
28830 *total = mode == DFmode ? rs6000_cost->ddiv
28831 : rs6000_cost->sdiv;
28838 if (CONST_INT_P (XEXP (x, 1))
28839 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
28841 if (code == DIV || code == MOD)
28843 *total = COSTS_N_INSNS (2);
28846 *total = COSTS_N_INSNS (1);
28850 if (GET_MODE (XEXP (x, 1)) == DImode)
28851 *total = rs6000_cost->divdi;
28853 *total = rs6000_cost->divsi;
28855 /* Add in shift and subtract for MOD unless we have a mod instruction. */
28856 if (!TARGET_MODULO && (code == MOD || code == UMOD))
28857 *total += COSTS_N_INSNS (2);
28861 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
28865 *total = COSTS_N_INSNS (4);
28869 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
28873 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
28877 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
28880 *total = COSTS_N_INSNS (1);
28884 if (CONST_INT_P (XEXP (x, 1)))
28886 rtx left = XEXP (x, 0);
28887 rtx_code left_code = GET_CODE (left);
28889 /* rotate-and-mask: 1 insn. */
28890 if ((left_code == ROTATE
28891 || left_code == ASHIFT
28892 || left_code == LSHIFTRT)
28893 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
28895 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
28896 if (!CONST_INT_P (XEXP (left, 1)))
28897 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
28898 *total += COSTS_N_INSNS (1);
28902 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
28903 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
28904 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
28905 || (val & 0xffff) == val
28906 || (val & 0xffff0000) == val
28907 || ((val & 0xffff) == 0 && mode == SImode))
28909 *total = rtx_cost (left, mode, AND, 0, speed);
28910 *total += COSTS_N_INSNS (1);
28915 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
28917 *total = rtx_cost (left, mode, AND, 0, speed);
28918 *total += COSTS_N_INSNS (2);
28923 *total = COSTS_N_INSNS (1);
28928 *total = COSTS_N_INSNS (1);
28934 *total = COSTS_N_INSNS (1);
28938 /* The EXTSWSLI instruction is a combined instruction. Don't count both
28939 the sign extend and shift separately within the insn. */
28940 if (TARGET_EXTSWSLI && mode == DImode
28941 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
28942 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
28953 /* Handle mul_highpart. */
28954 if (outer_code == TRUNCATE
28955 && GET_CODE (XEXP (x, 0)) == MULT)
28957 if (mode == DImode)
28958 *total = rs6000_cost->muldi;
28960 *total = rs6000_cost->mulsi;
28963 else if (outer_code == AND)
28966 *total = COSTS_N_INSNS (1);
28971 if (MEM_P (XEXP (x, 0)))
28974 *total = COSTS_N_INSNS (1);
28980 if (!FLOAT_MODE_P (mode))
28982 *total = COSTS_N_INSNS (1);
28988 case UNSIGNED_FLOAT:
28991 case FLOAT_TRUNCATE:
28992 *total = rs6000_cost->fp;
28996 if (mode == DFmode)
28997 *total = rs6000_cost->sfdf_convert;
28999 *total = rs6000_cost->fp;
29003 switch (XINT (x, 1))
29006 *total = rs6000_cost->fp;
29018 *total = COSTS_N_INSNS (1);
29021 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
29023 *total = rs6000_cost->fp;
29032 /* Carry bit requires mode == Pmode.
29033 NEG or PLUS already counted so only add one. */
29035 && (outer_code == NEG || outer_code == PLUS))
29037 *total = COSTS_N_INSNS (1);
29045 if (outer_code == SET)
29047 if (XEXP (x, 1) == const0_rtx)
29049 *total = COSTS_N_INSNS (2);
29054 *total = COSTS_N_INSNS (3);
29059 if (outer_code == COMPARE)
29073 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
29076 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
29077 int opno, int *total, bool speed)
29079 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
29082 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
29083 "opno = %d, total = %d, speed = %s, x:\n",
29084 ret ? "complete" : "scan inner",
29085 GET_MODE_NAME (mode),
29086 GET_RTX_NAME (outer_code),
29089 speed ? "true" : "false");
29097 rs6000_insn_cost (rtx_insn *insn, bool speed)
29099 if (recog_memoized (insn) < 0)
29103 return get_attr_length (insn);
29105 int cost = get_attr_cost (insn);
29109 int n = get_attr_length (insn) / 4;
29110 enum attr_type type = get_attr_type (insn);
29117 cost = COSTS_N_INSNS (n + 1);
29121 switch (get_attr_size (insn))
29124 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
29127 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
29130 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
29133 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
29136 gcc_unreachable ();
29140 switch (get_attr_size (insn))
29143 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
29146 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
29149 gcc_unreachable ();
29154 cost = n * rs6000_cost->fp;
29157 cost = n * rs6000_cost->dmul;
29160 cost = n * rs6000_cost->sdiv;
29163 cost = n * rs6000_cost->ddiv;
29170 cost = COSTS_N_INSNS (n + 2);
29174 cost = COSTS_N_INSNS (n);
29180 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
29183 rs6000_debug_address_cost (rtx x, machine_mode mode,
29184 addr_space_t as, bool speed)
29186 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
29188 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
29189 ret, speed ? "true" : "false");
29196 /* A C expression returning the cost of moving data from a register of class
29197 CLASS1 to one of CLASS2. */
29200 rs6000_register_move_cost (machine_mode mode,
29201 reg_class_t from, reg_class_t to)
29204 reg_class_t rclass;
29206 if (TARGET_DEBUG_COST)
29209 /* If we have VSX, we can easily move between FPR or Altivec registers,
29210 otherwise we can only easily move within classes.
29211 Do this first so we give best-case answers for union classes
29212 containing both gprs and vsx regs. */
29213 HARD_REG_SET to_vsx, from_vsx;
29214 COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
29215 AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
29216 COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
29217 AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
29218 if (!hard_reg_set_empty_p (to_vsx)
29219 && !hard_reg_set_empty_p (from_vsx)
29221 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
29223 int reg = FIRST_FPR_REGNO;
29225 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
29226 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
29227 reg = FIRST_ALTIVEC_REGNO;
29228 ret = 2 * hard_regno_nregs (reg, mode);
29231 /* Moves from/to GENERAL_REGS. */
29232 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
29233 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
29235 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
29237 if (TARGET_DIRECT_MOVE)
29239 /* Keep the cost for direct moves above that for within
29240 a register class even if the actual processor cost is
29241 comparable. We do this because a direct move insn
29242 can't be a nop, whereas with ideal register
29243 allocation a move within the same class might turn
29244 out to be a nop. */
29245 if (rs6000_tune == PROCESSOR_POWER9
29246 || rs6000_tune == PROCESSOR_FUTURE)
29247 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29249 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29250 /* SFmode requires a conversion when moving between gprs
29252 if (mode == SFmode)
29256 ret = (rs6000_memory_move_cost (mode, rclass, false)
29257 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
29260 /* It's more expensive to move CR_REGS than CR0_REGS because of the
29262 else if (rclass == CR_REGS)
29265 /* For those processors that have slow LR/CTR moves, make them more
29266 expensive than memory in order to bias spills to memory .*/
29267 else if ((rs6000_tune == PROCESSOR_POWER6
29268 || rs6000_tune == PROCESSOR_POWER7
29269 || rs6000_tune == PROCESSOR_POWER8
29270 || rs6000_tune == PROCESSOR_POWER9)
29271 && reg_class_subset_p (rclass, SPECIAL_REGS))
29272 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29275 /* A move will cost one instruction per GPR moved. */
29276 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29279 /* Everything else has to go through GENERAL_REGS. */
29281 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
29282 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
29284 if (TARGET_DEBUG_COST)
29286 if (dbg_cost_ctrl == 1)
29288 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
29289 ret, GET_MODE_NAME (mode), reg_class_names[from],
29290 reg_class_names[to]);
29297 /* A C expressions returning the cost of moving data of MODE from a register to
29301 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
29302 bool in ATTRIBUTE_UNUSED)
29306 if (TARGET_DEBUG_COST)
29309 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
29310 ret = 4 * hard_regno_nregs (0, mode);
29311 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
29312 || reg_classes_intersect_p (rclass, VSX_REGS)))
29313 ret = 4 * hard_regno_nregs (32, mode);
29314 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
29315 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
29317 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
29319 if (TARGET_DEBUG_COST)
29321 if (dbg_cost_ctrl == 1)
29323 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
29324 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
29331 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
29333 The register allocator chooses GEN_OR_VSX_REGS for the allocno
29334 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
29335 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
29336 move cost between GENERAL_REGS and VSX_REGS low.
29338 It might seem reasonable to use a union class. After all, if usage
29339 of vsr is low and gpr high, it might make sense to spill gpr to vsr
29340 rather than memory. However, in cases where register pressure of
29341 both is high, like the cactus_adm spec test, allowing
29342 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
29343 the first scheduling pass. This is partly due to an allocno of
29344 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
29345 class, which gives too high a pressure for GENERAL_REGS and too low
29346 for VSX_REGS. So, force a choice of the subclass here.
29348 The best class is also the union if GENERAL_REGS and VSX_REGS have
29349 the same cost. In that case we do use GEN_OR_VSX_REGS as the
29350 allocno class, since trying to narrow down the class by regno mode
29351 is prone to error. For example, SImode is allowed in VSX regs and
29352 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
29353 it would be wrong to choose an allocno of GENERAL_REGS based on
29357 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
29358 reg_class_t allocno_class,
29359 reg_class_t best_class)
29361 switch (allocno_class)
29363 case GEN_OR_VSX_REGS:
29364 /* best_class must be a subset of allocno_class. */
29365 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
29366 || best_class == GEN_OR_FLOAT_REGS
29367 || best_class == VSX_REGS
29368 || best_class == ALTIVEC_REGS
29369 || best_class == FLOAT_REGS
29370 || best_class == GENERAL_REGS
29371 || best_class == BASE_REGS);
29372 /* Use best_class but choose wider classes when copying from the
29373 wider class to best_class is cheap. This mimics IRA choice
29374 of allocno class. */
29375 if (best_class == BASE_REGS)
29376 return GENERAL_REGS;
29378 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
29386 return allocno_class;
29389 /* Returns a code for a target-specific builtin that implements
29390 reciprocal of the function, or NULL_TREE if not available. */
29393 rs6000_builtin_reciprocal (tree fndecl)
29395 switch (DECL_FUNCTION_CODE (fndecl))
29397 case VSX_BUILTIN_XVSQRTDP:
29398 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
29401 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
29403 case VSX_BUILTIN_XVSQRTSP:
29404 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
29407 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
29414 /* Load up a constant. If the mode is a vector mode, splat the value across
29415 all of the vector elements. */
29418 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
29422 if (mode == SFmode || mode == DFmode)
29424 rtx d = const_double_from_real_value (dconst, mode);
29425 reg = force_reg (mode, d);
29427 else if (mode == V4SFmode)
29429 rtx d = const_double_from_real_value (dconst, SFmode);
29430 rtvec v = gen_rtvec (4, d, d, d, d);
29431 reg = gen_reg_rtx (mode);
29432 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29434 else if (mode == V2DFmode)
29436 rtx d = const_double_from_real_value (dconst, DFmode);
29437 rtvec v = gen_rtvec (2, d, d);
29438 reg = gen_reg_rtx (mode);
29439 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29442 gcc_unreachable ();
29447 /* Generate an FMA instruction. */
29450 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
29452 machine_mode mode = GET_MODE (target);
29455 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
29456 gcc_assert (dst != NULL);
29459 emit_move_insn (target, dst);
29462 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
29465 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
29467 machine_mode mode = GET_MODE (dst);
29470 /* This is a tad more complicated, since the fnma_optab is for
29471 a different expression: fma(-m1, m2, a), which is the same
29472 thing except in the case of signed zeros.
29474 Fortunately we know that if FMA is supported that FNMSUB is
29475 also supported in the ISA. Just expand it directly. */
29477 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
29479 r = gen_rtx_NEG (mode, a);
29480 r = gen_rtx_FMA (mode, m1, m2, r);
29481 r = gen_rtx_NEG (mode, r);
29482 emit_insn (gen_rtx_SET (dst, r));
29485 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
29486 add a reg_note saying that this was a division. Support both scalar and
29487 vector divide. Assumes no trapping math and finite arguments. */
29490 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
29492 machine_mode mode = GET_MODE (dst);
29493 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
29496 /* Low precision estimates guarantee 5 bits of accuracy. High
29497 precision estimates guarantee 14 bits of accuracy. SFmode
29498 requires 23 bits of accuracy. DFmode requires 52 bits of
29499 accuracy. Each pass at least doubles the accuracy, leading
29500 to the following. */
29501 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29502 if (mode == DFmode || mode == V2DFmode)
29505 enum insn_code code = optab_handler (smul_optab, mode);
29506 insn_gen_fn gen_mul = GEN_FCN (code);
29508 gcc_assert (code != CODE_FOR_nothing);
29510 one = rs6000_load_constant_and_splat (mode, dconst1);
29512 /* x0 = 1./d estimate */
29513 x0 = gen_reg_rtx (mode);
29514 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
29517 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
29520 /* e0 = 1. - d * x0 */
29521 e0 = gen_reg_rtx (mode);
29522 rs6000_emit_nmsub (e0, d, x0, one);
29524 /* x1 = x0 + e0 * x0 */
29525 x1 = gen_reg_rtx (mode);
29526 rs6000_emit_madd (x1, e0, x0, x0);
29528 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
29529 ++i, xprev = xnext, eprev = enext) {
29531 /* enext = eprev * eprev */
29532 enext = gen_reg_rtx (mode);
29533 emit_insn (gen_mul (enext, eprev, eprev));
29535 /* xnext = xprev + enext * xprev */
29536 xnext = gen_reg_rtx (mode);
29537 rs6000_emit_madd (xnext, enext, xprev, xprev);
29543 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
29545 /* u = n * xprev */
29546 u = gen_reg_rtx (mode);
29547 emit_insn (gen_mul (u, n, xprev));
29549 /* v = n - (d * u) */
29550 v = gen_reg_rtx (mode);
29551 rs6000_emit_nmsub (v, d, u, n);
29553 /* dst = (v * xprev) + u */
29554 rs6000_emit_madd (dst, v, xprev, u);
29557 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
29560 /* Goldschmidt's Algorithm for single/double-precision floating point
29561 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
29564 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
29566 machine_mode mode = GET_MODE (src);
29567 rtx e = gen_reg_rtx (mode);
29568 rtx g = gen_reg_rtx (mode);
29569 rtx h = gen_reg_rtx (mode);
29571 /* Low precision estimates guarantee 5 bits of accuracy. High
29572 precision estimates guarantee 14 bits of accuracy. SFmode
29573 requires 23 bits of accuracy. DFmode requires 52 bits of
29574 accuracy. Each pass at least doubles the accuracy, leading
29575 to the following. */
29576 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29577 if (mode == DFmode || mode == V2DFmode)
29582 enum insn_code code = optab_handler (smul_optab, mode);
29583 insn_gen_fn gen_mul = GEN_FCN (code);
29585 gcc_assert (code != CODE_FOR_nothing);
29587 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
29589 /* e = rsqrt estimate */
29590 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
29593 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
29596 rtx zero = force_reg (mode, CONST0_RTX (mode));
29598 if (mode == SFmode)
29600 rtx target = emit_conditional_move (e, GT, src, zero, mode,
29603 emit_move_insn (e, target);
29607 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
29608 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
29612 /* g = sqrt estimate. */
29613 emit_insn (gen_mul (g, e, src));
29614 /* h = 1/(2*sqrt) estimate. */
29615 emit_insn (gen_mul (h, e, mhalf));
29621 rtx t = gen_reg_rtx (mode);
29622 rs6000_emit_nmsub (t, g, h, mhalf);
29623 /* Apply correction directly to 1/rsqrt estimate. */
29624 rs6000_emit_madd (dst, e, t, e);
29628 for (i = 0; i < passes; i++)
29630 rtx t1 = gen_reg_rtx (mode);
29631 rtx g1 = gen_reg_rtx (mode);
29632 rtx h1 = gen_reg_rtx (mode);
29634 rs6000_emit_nmsub (t1, g, h, mhalf);
29635 rs6000_emit_madd (g1, g, t1, g);
29636 rs6000_emit_madd (h1, h, t1, h);
29641 /* Multiply by 2 for 1/rsqrt. */
29642 emit_insn (gen_add3_insn (dst, h, h));
29647 rtx t = gen_reg_rtx (mode);
29648 rs6000_emit_nmsub (t, g, h, mhalf);
29649 rs6000_emit_madd (dst, g, t, g);
29655 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
29656 (Power7) targets. DST is the target, and SRC is the argument operand. */
29659 rs6000_emit_popcount (rtx dst, rtx src)
29661 machine_mode mode = GET_MODE (dst);
29664 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
29665 if (TARGET_POPCNTD)
29667 if (mode == SImode)
29668 emit_insn (gen_popcntdsi2 (dst, src));
29670 emit_insn (gen_popcntddi2 (dst, src));
29674 tmp1 = gen_reg_rtx (mode);
29676 if (mode == SImode)
29678 emit_insn (gen_popcntbsi2 (tmp1, src));
29679 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
29681 tmp2 = force_reg (SImode, tmp2);
29682 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
29686 emit_insn (gen_popcntbdi2 (tmp1, src));
29687 tmp2 = expand_mult (DImode, tmp1,
29688 GEN_INT ((HOST_WIDE_INT)
29689 0x01010101 << 32 | 0x01010101),
29691 tmp2 = force_reg (DImode, tmp2);
29692 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
29697 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
29698 target, and SRC is the argument operand. */
29701 rs6000_emit_parity (rtx dst, rtx src)
29703 machine_mode mode = GET_MODE (dst);
29706 tmp = gen_reg_rtx (mode);
29708 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
29711 if (mode == SImode)
29713 emit_insn (gen_popcntbsi2 (tmp, src));
29714 emit_insn (gen_paritysi2_cmpb (dst, tmp));
29718 emit_insn (gen_popcntbdi2 (tmp, src));
29719 emit_insn (gen_paritydi2_cmpb (dst, tmp));
29724 if (mode == SImode)
29726 /* Is mult+shift >= shift+xor+shift+xor? */
29727 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
29729 rtx tmp1, tmp2, tmp3, tmp4;
29731 tmp1 = gen_reg_rtx (SImode);
29732 emit_insn (gen_popcntbsi2 (tmp1, src));
29734 tmp2 = gen_reg_rtx (SImode);
29735 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
29736 tmp3 = gen_reg_rtx (SImode);
29737 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
29739 tmp4 = gen_reg_rtx (SImode);
29740 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
29741 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
29744 rs6000_emit_popcount (tmp, src);
29745 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
29749 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
29750 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
29752 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
29754 tmp1 = gen_reg_rtx (DImode);
29755 emit_insn (gen_popcntbdi2 (tmp1, src));
29757 tmp2 = gen_reg_rtx (DImode);
29758 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
29759 tmp3 = gen_reg_rtx (DImode);
29760 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
29762 tmp4 = gen_reg_rtx (DImode);
29763 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
29764 tmp5 = gen_reg_rtx (DImode);
29765 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
29767 tmp6 = gen_reg_rtx (DImode);
29768 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
29769 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
29772 rs6000_emit_popcount (tmp, src);
29773 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
29777 /* Expand an Altivec constant permutation for little endian mode.
29778 OP0 and OP1 are the input vectors and TARGET is the output vector.
29779 SEL specifies the constant permutation vector.
29781 There are two issues: First, the two input operands must be
29782 swapped so that together they form a double-wide array in LE
29783 order. Second, the vperm instruction has surprising behavior
29784 in LE mode: it interprets the elements of the source vectors
29785 in BE mode ("left to right") and interprets the elements of
29786 the destination vector in LE mode ("right to left"). To
29787 correct for this, we must subtract each element of the permute
29788 control vector from 31.
29790 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
29791 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
29792 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
29793 serve as the permute control vector. Then, in BE mode,
29797 places the desired result in vr9. However, in LE mode the
29798 vector contents will be
29800 vr10 = 00000003 00000002 00000001 00000000
29801 vr11 = 00000007 00000006 00000005 00000004
29803 The result of the vperm using the same permute control vector is
29805 vr9 = 05000000 07000000 01000000 03000000
29807 That is, the leftmost 4 bytes of vr10 are interpreted as the
29808 source for the rightmost 4 bytes of vr9, and so on.
29810 If we change the permute control vector to
29812 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
29820 vr9 = 00000006 00000004 00000002 00000000. */
29823 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
29824 const vec_perm_indices &sel)
29828 rtx constv, unspec;
29830 /* Unpack and adjust the constant selector. */
29831 for (i = 0; i < 16; ++i)
29833 unsigned int elt = 31 - (sel[i] & 31);
29834 perm[i] = GEN_INT (elt);
29837 /* Expand to a permute, swapping the inputs and using the
29838 adjusted selector. */
29840 op0 = force_reg (V16QImode, op0);
29842 op1 = force_reg (V16QImode, op1);
29844 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
29845 constv = force_reg (V16QImode, constv);
29846 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
29848 if (!REG_P (target))
29850 rtx tmp = gen_reg_rtx (V16QImode);
29851 emit_move_insn (tmp, unspec);
29855 emit_move_insn (target, unspec);
29858 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
29859 permute control vector. But here it's not a constant, so we must
29860 generate a vector NAND or NOR to do the adjustment. */
29863 altivec_expand_vec_perm_le (rtx operands[4])
29865 rtx notx, iorx, unspec;
29866 rtx target = operands[0];
29867 rtx op0 = operands[1];
29868 rtx op1 = operands[2];
29869 rtx sel = operands[3];
29871 rtx norreg = gen_reg_rtx (V16QImode);
29872 machine_mode mode = GET_MODE (target);
29874 /* Get everything in regs so the pattern matches. */
29876 op0 = force_reg (mode, op0);
29878 op1 = force_reg (mode, op1);
29880 sel = force_reg (V16QImode, sel);
29881 if (!REG_P (target))
29882 tmp = gen_reg_rtx (mode);
29884 if (TARGET_P9_VECTOR)
29886 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
29891 /* Invert the selector with a VNAND if available, else a VNOR.
29892 The VNAND is preferred for future fusion opportunities. */
29893 notx = gen_rtx_NOT (V16QImode, sel);
29894 iorx = (TARGET_P8_VECTOR
29895 ? gen_rtx_IOR (V16QImode, notx, notx)
29896 : gen_rtx_AND (V16QImode, notx, notx));
29897 emit_insn (gen_rtx_SET (norreg, iorx));
29899 /* Permute with operands reversed and adjusted selector. */
29900 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
29904 /* Copy into target, possibly by way of a register. */
29905 if (!REG_P (target))
29907 emit_move_insn (tmp, unspec);
29911 emit_move_insn (target, unspec);
29914 /* Expand an Altivec constant permutation. Return true if we match
29915 an efficient implementation; false to fall back to VPERM.
29917 OP0 and OP1 are the input vectors and TARGET is the output vector.
29918 SEL specifies the constant permutation vector. */
29921 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
29922 const vec_perm_indices &sel)
29924 struct altivec_perm_insn {
29925 HOST_WIDE_INT mask;
29926 enum insn_code impl;
29927 unsigned char perm[16];
29929 static const struct altivec_perm_insn patterns[] = {
29930 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
29931 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
29932 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
29933 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
29934 { OPTION_MASK_ALTIVEC,
29935 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
29936 : CODE_FOR_altivec_vmrglb_direct),
29937 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
29938 { OPTION_MASK_ALTIVEC,
29939 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
29940 : CODE_FOR_altivec_vmrglh_direct),
29941 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
29942 { OPTION_MASK_ALTIVEC,
29943 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
29944 : CODE_FOR_altivec_vmrglw_direct),
29945 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
29946 { OPTION_MASK_ALTIVEC,
29947 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
29948 : CODE_FOR_altivec_vmrghb_direct),
29949 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
29950 { OPTION_MASK_ALTIVEC,
29951 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
29952 : CODE_FOR_altivec_vmrghh_direct),
29953 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
29954 { OPTION_MASK_ALTIVEC,
29955 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
29956 : CODE_FOR_altivec_vmrghw_direct),
29957 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
29958 { OPTION_MASK_P8_VECTOR,
29959 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
29960 : CODE_FOR_p8_vmrgow_v4sf_direct),
29961 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
29962 { OPTION_MASK_P8_VECTOR,
29963 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
29964 : CODE_FOR_p8_vmrgew_v4sf_direct),
29965 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
29968 unsigned int i, j, elt, which;
29969 unsigned char perm[16];
29973 /* Unpack the constant selector. */
29974 for (i = which = 0; i < 16; ++i)
29977 which |= (elt < 16 ? 1 : 2);
29981 /* Simplify the constant selector based on operands. */
29985 gcc_unreachable ();
29989 if (!rtx_equal_p (op0, op1))
29994 for (i = 0; i < 16; ++i)
30006 /* Look for splat patterns. */
30011 for (i = 0; i < 16; ++i)
30012 if (perm[i] != elt)
30016 if (!BYTES_BIG_ENDIAN)
30018 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
30024 for (i = 0; i < 16; i += 2)
30025 if (perm[i] != elt || perm[i + 1] != elt + 1)
30029 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
30030 x = gen_reg_rtx (V8HImode);
30031 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
30033 emit_move_insn (target, gen_lowpart (V16QImode, x));
30040 for (i = 0; i < 16; i += 4)
30042 || perm[i + 1] != elt + 1
30043 || perm[i + 2] != elt + 2
30044 || perm[i + 3] != elt + 3)
30048 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
30049 x = gen_reg_rtx (V4SImode);
30050 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
30052 emit_move_insn (target, gen_lowpart (V16QImode, x));
30058 /* Look for merge and pack patterns. */
30059 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
30063 if ((patterns[j].mask & rs6000_isa_flags) == 0)
30066 elt = patterns[j].perm[0];
30067 if (perm[0] == elt)
30069 else if (perm[0] == elt + 16)
30073 for (i = 1; i < 16; ++i)
30075 elt = patterns[j].perm[i];
30077 elt = (elt >= 16 ? elt - 16 : elt + 16);
30078 else if (one_vec && elt >= 16)
30080 if (perm[i] != elt)
30085 enum insn_code icode = patterns[j].impl;
30086 machine_mode omode = insn_data[icode].operand[0].mode;
30087 machine_mode imode = insn_data[icode].operand[1].mode;
30089 /* For little-endian, don't use vpkuwum and vpkuhum if the
30090 underlying vector type is not V4SI and V8HI, respectively.
30091 For example, using vpkuwum with a V8HI picks up the even
30092 halfwords (BE numbering) when the even halfwords (LE
30093 numbering) are what we need. */
30094 if (!BYTES_BIG_ENDIAN
30095 && icode == CODE_FOR_altivec_vpkuwum_direct
30097 && GET_MODE (op0) != V4SImode)
30099 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
30101 if (!BYTES_BIG_ENDIAN
30102 && icode == CODE_FOR_altivec_vpkuhum_direct
30104 && GET_MODE (op0) != V8HImode)
30106 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
30109 /* For little-endian, the two input operands must be swapped
30110 (or swapped back) to ensure proper right-to-left numbering
30112 if (swapped ^ !BYTES_BIG_ENDIAN)
30113 std::swap (op0, op1);
30114 if (imode != V16QImode)
30116 op0 = gen_lowpart (imode, op0);
30117 op1 = gen_lowpart (imode, op1);
30119 if (omode == V16QImode)
30122 x = gen_reg_rtx (omode);
30123 emit_insn (GEN_FCN (icode) (x, op0, op1));
30124 if (omode != V16QImode)
30125 emit_move_insn (target, gen_lowpart (V16QImode, x));
30130 if (!BYTES_BIG_ENDIAN)
30132 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
30139 /* Expand a VSX Permute Doubleword constant permutation.
30140 Return true if we match an efficient implementation. */
30143 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
30144 unsigned char perm0, unsigned char perm1)
30148 /* If both selectors come from the same operand, fold to single op. */
30149 if ((perm0 & 2) == (perm1 & 2))
30156 /* If both operands are equal, fold to simpler permutation. */
30157 if (rtx_equal_p (op0, op1))
30160 perm1 = (perm1 & 1) + 2;
30162 /* If the first selector comes from the second operand, swap. */
30163 else if (perm0 & 2)
30169 std::swap (op0, op1);
30171 /* If the second selector does not come from the second operand, fail. */
30172 else if ((perm1 & 2) == 0)
30176 if (target != NULL)
30178 machine_mode vmode, dmode;
30181 vmode = GET_MODE (target);
30182 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
30183 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
30184 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
30185 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
30186 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
30187 emit_insn (gen_rtx_SET (target, x));
30192 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30195 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
30196 rtx op1, const vec_perm_indices &sel)
30198 bool testing_p = !target;
30200 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
30201 if (TARGET_ALTIVEC && testing_p)
30204 /* Check for ps_merge* or xxpermdi insns. */
30205 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
30209 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
30210 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
30212 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
30216 if (TARGET_ALTIVEC)
30218 /* Force the target-independent code to lower to V16QImode. */
30219 if (vmode != V16QImode)
30221 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
30228 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
30229 OP0 and OP1 are the input vectors and TARGET is the output vector.
30230 PERM specifies the constant permutation vector. */
30233 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
30234 machine_mode vmode, const vec_perm_builder &perm)
30236 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
30238 emit_move_insn (target, x);
30241 /* Expand an extract even operation. */
30244 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
30246 machine_mode vmode = GET_MODE (target);
30247 unsigned i, nelt = GET_MODE_NUNITS (vmode);
30248 vec_perm_builder perm (nelt, nelt, 1);
30250 for (i = 0; i < nelt; i++)
30251 perm.quick_push (i * 2);
30253 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30256 /* Expand a vector interleave operation. */
30259 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
30261 machine_mode vmode = GET_MODE (target);
30262 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
30263 vec_perm_builder perm (nelt, nelt, 1);
30265 high = (highp ? 0 : nelt / 2);
30266 for (i = 0; i < nelt / 2; i++)
30268 perm.quick_push (i + high);
30269 perm.quick_push (i + nelt + high);
30272 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30275 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
30277 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
30279 HOST_WIDE_INT hwi_scale (scale);
30280 REAL_VALUE_TYPE r_pow;
30281 rtvec v = rtvec_alloc (2);
30283 rtx scale_vec = gen_reg_rtx (V2DFmode);
30284 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
30285 elt = const_double_from_real_value (r_pow, DFmode);
30286 RTVEC_ELT (v, 0) = elt;
30287 RTVEC_ELT (v, 1) = elt;
30288 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
30289 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
30292 /* Return an RTX representing where to find the function value of a
30293 function returning MODE. */
30295 rs6000_complex_function_value (machine_mode mode)
30297 unsigned int regno;
30299 machine_mode inner = GET_MODE_INNER (mode);
30300 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
30302 if (TARGET_FLOAT128_TYPE
30304 || (mode == TCmode && TARGET_IEEEQUAD)))
30305 regno = ALTIVEC_ARG_RETURN;
30307 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30308 regno = FP_ARG_RETURN;
30312 regno = GP_ARG_RETURN;
30314 /* 32-bit is OK since it'll go in r3/r4. */
30315 if (TARGET_32BIT && inner_bytes >= 4)
30316 return gen_rtx_REG (mode, regno);
30319 if (inner_bytes >= 8)
30320 return gen_rtx_REG (mode, regno);
30322 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
30324 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
30325 GEN_INT (inner_bytes));
30326 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
30329 /* Return an rtx describing a return value of MODE as a PARALLEL
30330 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
30331 stride REG_STRIDE. */
30334 rs6000_parallel_return (machine_mode mode,
30335 int n_elts, machine_mode elt_mode,
30336 unsigned int regno, unsigned int reg_stride)
30338 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
30341 for (i = 0; i < n_elts; i++)
30343 rtx r = gen_rtx_REG (elt_mode, regno);
30344 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
30345 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
30346 regno += reg_stride;
30352 /* Target hook for TARGET_FUNCTION_VALUE.
30354 An integer value is in r3 and a floating-point value is in fp1,
30355 unless -msoft-float. */
30358 rs6000_function_value (const_tree valtype,
30359 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
30360 bool outgoing ATTRIBUTE_UNUSED)
30363 unsigned int regno;
30364 machine_mode elt_mode;
30367 /* Special handling for structs in darwin64. */
30369 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
30371 CUMULATIVE_ARGS valcum;
30375 valcum.fregno = FP_ARG_MIN_REG;
30376 valcum.vregno = ALTIVEC_ARG_MIN_REG;
30377 /* Do a trial code generation as if this were going to be passed as
30378 an argument; if any part goes in memory, we return NULL. */
30379 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
30382 /* Otherwise fall through to standard ABI rules. */
30385 mode = TYPE_MODE (valtype);
30387 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
30388 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
30390 int first_reg, n_regs;
30392 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
30394 /* _Decimal128 must use even/odd register pairs. */
30395 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30396 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
30400 first_reg = ALTIVEC_ARG_RETURN;
30404 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
30407 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
30408 if (TARGET_32BIT && TARGET_POWERPC64)
30417 int count = GET_MODE_SIZE (mode) / 4;
30418 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
30421 if ((INTEGRAL_TYPE_P (valtype)
30422 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
30423 || POINTER_TYPE_P (valtype))
30424 mode = TARGET_32BIT ? SImode : DImode;
30426 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30427 /* _Decimal128 must use an even/odd register pair. */
30428 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30429 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
30430 && !FLOAT128_VECTOR_P (mode))
30431 regno = FP_ARG_RETURN;
30432 else if (TREE_CODE (valtype) == COMPLEX_TYPE
30433 && targetm.calls.split_complex_arg)
30434 return rs6000_complex_function_value (mode);
30435 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30436 return register is used in both cases, and we won't see V2DImode/V2DFmode
30437 for pure altivec, combine the two cases. */
30438 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
30439 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
30440 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
30441 regno = ALTIVEC_ARG_RETURN;
30443 regno = GP_ARG_RETURN;
30445 return gen_rtx_REG (mode, regno);
30448 /* Define how to find the value returned by a library function
30449 assuming the value has mode MODE. */
30451 rs6000_libcall_value (machine_mode mode)
30453 unsigned int regno;
30455 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
30456 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
30457 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
30459 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30460 /* _Decimal128 must use an even/odd register pair. */
30461 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30462 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
30463 regno = FP_ARG_RETURN;
30464 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30465 return register is used in both cases, and we won't see V2DImode/V2DFmode
30466 for pure altivec, combine the two cases. */
30467 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
30468 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
30469 regno = ALTIVEC_ARG_RETURN;
30470 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
30471 return rs6000_complex_function_value (mode);
30473 regno = GP_ARG_RETURN;
30475 return gen_rtx_REG (mode, regno);
30478 /* Compute register pressure classes. We implement the target hook to avoid
30479 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
30480 lead to incorrect estimates of number of available registers and therefor
30481 increased register pressure/spill. */
30483 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
30488 pressure_classes[n++] = GENERAL_REGS;
30490 pressure_classes[n++] = VSX_REGS;
30493 if (TARGET_ALTIVEC)
30494 pressure_classes[n++] = ALTIVEC_REGS;
30495 if (TARGET_HARD_FLOAT)
30496 pressure_classes[n++] = FLOAT_REGS;
30498 pressure_classes[n++] = CR_REGS;
30499 pressure_classes[n++] = SPECIAL_REGS;
30504 /* Given FROM and TO register numbers, say whether this elimination is allowed.
30505 Frame pointer elimination is automatically handled.
30507 For the RS/6000, if frame pointer elimination is being done, we would like
30508 to convert ap into fp, not sp.
30510 We need r30 if -mminimal-toc was specified, and there are constant pool
30514 rs6000_can_eliminate (const int from, const int to)
30516 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
30517 ? ! frame_pointer_needed
30518 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
30519 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
30520 || constant_pool_empty_p ()
30524 /* Define the offset between two registers, FROM to be eliminated and its
30525 replacement TO, at the start of a routine. */
30527 rs6000_initial_elimination_offset (int from, int to)
30529 rs6000_stack_t *info = rs6000_stack_info ();
30530 HOST_WIDE_INT offset;
30532 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30533 offset = info->push_p ? 0 : -info->total_size;
30534 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30536 offset = info->push_p ? 0 : -info->total_size;
30537 if (FRAME_GROWS_DOWNWARD)
30538 offset += info->fixed_size + info->vars_size + info->parm_size;
30540 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30541 offset = FRAME_GROWS_DOWNWARD
30542 ? info->fixed_size + info->vars_size + info->parm_size
30544 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30545 offset = info->total_size;
30546 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30547 offset = info->push_p ? info->total_size : 0;
30548 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
30551 gcc_unreachable ();
30556 /* Fill in sizes of registers used by unwinder. */
30559 rs6000_init_dwarf_reg_sizes_extra (tree address)
30561 if (TARGET_MACHO && ! TARGET_ALTIVEC)
30564 machine_mode mode = TYPE_MODE (char_type_node);
30565 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
30566 rtx mem = gen_rtx_MEM (BLKmode, addr);
30567 rtx value = gen_int_mode (16, mode);
30569 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
30570 The unwinder still needs to know the size of Altivec registers. */
30572 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
30574 int column = DWARF_REG_TO_UNWIND_COLUMN
30575 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
30576 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
30578 emit_move_insn (adjust_address (mem, mode, offset), value);
30583 /* Map internal gcc register numbers to debug format register numbers.
30584 FORMAT specifies the type of debug register number to use:
30585 0 -- debug information, except for frame-related sections
30586 1 -- DWARF .debug_frame section
30587 2 -- DWARF .eh_frame section */
30590 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
30592 /* On some platforms, we use the standard DWARF register
30593 numbering for .debug_info and .debug_frame. */
30594 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
30596 #ifdef RS6000_USE_DWARF_NUMBERING
30599 if (FP_REGNO_P (regno))
30600 return regno - FIRST_FPR_REGNO + 32;
30601 if (ALTIVEC_REGNO_P (regno))
30602 return regno - FIRST_ALTIVEC_REGNO + 1124;
30603 if (regno == LR_REGNO)
30605 if (regno == CTR_REGNO)
30607 if (regno == CA_REGNO)
30608 return 101; /* XER */
30609 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
30610 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
30611 The actual code emitted saves the whole of CR, so we map CR2_REGNO
30612 to the DWARF reg for CR. */
30613 if (format == 1 && regno == CR2_REGNO)
30615 if (CR_REGNO_P (regno))
30616 return regno - CR0_REGNO + 86;
30617 if (regno == VRSAVE_REGNO)
30619 if (regno == VSCR_REGNO)
30622 /* These do not make much sense. */
30623 if (regno == FRAME_POINTER_REGNUM)
30625 if (regno == ARG_POINTER_REGNUM)
30630 gcc_unreachable ();
30634 /* We use the GCC 7 (and before) internal number for non-DWARF debug
30635 information, and also for .eh_frame. */
30636 /* Translate the regnos to their numbers in GCC 7 (and before). */
30639 if (FP_REGNO_P (regno))
30640 return regno - FIRST_FPR_REGNO + 32;
30641 if (ALTIVEC_REGNO_P (regno))
30642 return regno - FIRST_ALTIVEC_REGNO + 77;
30643 if (regno == LR_REGNO)
30645 if (regno == CTR_REGNO)
30647 if (regno == CA_REGNO)
30648 return 76; /* XER */
30649 if (CR_REGNO_P (regno))
30650 return regno - CR0_REGNO + 68;
30651 if (regno == VRSAVE_REGNO)
30653 if (regno == VSCR_REGNO)
30656 if (regno == FRAME_POINTER_REGNUM)
30658 if (regno == ARG_POINTER_REGNUM)
30663 gcc_unreachable ();
30666 /* target hook eh_return_filter_mode */
30667 static scalar_int_mode
30668 rs6000_eh_return_filter_mode (void)
30670 return TARGET_32BIT ? SImode : word_mode;
30673 /* Target hook for translate_mode_attribute. */
30674 static machine_mode
30675 rs6000_translate_mode_attribute (machine_mode mode)
30677 if ((FLOAT128_IEEE_P (mode)
30678 && ieee128_float_type_node == long_double_type_node)
30679 || (FLOAT128_IBM_P (mode)
30680 && ibm128_float_type_node == long_double_type_node))
30681 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
30685 /* Target hook for scalar_mode_supported_p. */
30687 rs6000_scalar_mode_supported_p (scalar_mode mode)
30689 /* -m32 does not support TImode. This is the default, from
30690 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
30691 same ABI as for -m32. But default_scalar_mode_supported_p allows
30692 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
30693 for -mpowerpc64. */
30694 if (TARGET_32BIT && mode == TImode)
30697 if (DECIMAL_FLOAT_MODE_P (mode))
30698 return default_decimal_float_supported_p ();
30699 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
30702 return default_scalar_mode_supported_p (mode);
30705 /* Target hook for vector_mode_supported_p. */
30707 rs6000_vector_mode_supported_p (machine_mode mode)
30709 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
30710 128-bit, the compiler might try to widen IEEE 128-bit to IBM
30712 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
30719 /* Target hook for floatn_mode. */
30720 static opt_scalar_float_mode
30721 rs6000_floatn_mode (int n, bool extended)
30731 if (TARGET_FLOAT128_TYPE)
30732 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30734 return opt_scalar_float_mode ();
30737 return opt_scalar_float_mode ();
30740 /* Those are the only valid _FloatNx types. */
30741 gcc_unreachable ();
30755 if (TARGET_FLOAT128_TYPE)
30756 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30758 return opt_scalar_float_mode ();
30761 return opt_scalar_float_mode ();
30767 /* Target hook for c_mode_for_suffix. */
30768 static machine_mode
30769 rs6000_c_mode_for_suffix (char suffix)
30771 if (TARGET_FLOAT128_TYPE)
30773 if (suffix == 'q' || suffix == 'Q')
30774 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30776 /* At the moment, we are not defining a suffix for IBM extended double.
30777 If/when the default for -mabi=ieeelongdouble is changed, and we want
30778 to support __ibm128 constants in legacy library code, we may need to
30779 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
30780 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
30781 __float80 constants. */
30787 /* Target hook for invalid_arg_for_unprototyped_fn. */
30788 static const char *
30789 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
30791 return (!rs6000_darwin64_abi
30793 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
30794 && (funcdecl == NULL_TREE
30795 || (TREE_CODE (funcdecl) == FUNCTION_DECL
30796 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
30797 ? N_("AltiVec argument passed to unprototyped function")
30801 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
30802 setup by using __stack_chk_fail_local hidden function instead of
30803 calling __stack_chk_fail directly. Otherwise it is better to call
30804 __stack_chk_fail directly. */
30806 static tree ATTRIBUTE_UNUSED
30807 rs6000_stack_protect_fail (void)
30809 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
30810 ? default_hidden_stack_protect_fail ()
30811 : default_external_stack_protect_fail ();
30814 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30817 static unsigned HOST_WIDE_INT
30818 rs6000_asan_shadow_offset (void)
30820 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
30824 /* Mask options that we want to support inside of attribute((target)) and
30825 #pragma GCC target operations. Note, we do not include things like
30826 64/32-bit, endianness, hard/soft floating point, etc. that would have
30827 different calling sequences. */
30829 struct rs6000_opt_mask {
30830 const char *name; /* option name */
30831 HOST_WIDE_INT mask; /* mask to set */
30832 bool invert; /* invert sense of mask */
30833 bool valid_target; /* option is a target option */
30836 static struct rs6000_opt_mask const rs6000_opt_masks[] =
30838 { "altivec", OPTION_MASK_ALTIVEC, false, true },
30839 { "cmpb", OPTION_MASK_CMPB, false, true },
30840 { "crypto", OPTION_MASK_CRYPTO, false, true },
30841 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
30842 { "dlmzb", OPTION_MASK_DLMZB, false, true },
30843 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
30845 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
30846 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
30847 { "fprnd", OPTION_MASK_FPRND, false, true },
30848 { "future", OPTION_MASK_FUTURE, false, true },
30849 { "hard-dfp", OPTION_MASK_DFP, false, true },
30850 { "htm", OPTION_MASK_HTM, false, true },
30851 { "isel", OPTION_MASK_ISEL, false, true },
30852 { "mfcrf", OPTION_MASK_MFCRF, false, true },
30853 { "mfpgpr", 0, false, true },
30854 { "modulo", OPTION_MASK_MODULO, false, true },
30855 { "mulhw", OPTION_MASK_MULHW, false, true },
30856 { "multiple", OPTION_MASK_MULTIPLE, false, true },
30857 { "pcrel", OPTION_MASK_PCREL, false, true },
30858 { "popcntb", OPTION_MASK_POPCNTB, false, true },
30859 { "popcntd", OPTION_MASK_POPCNTD, false, true },
30860 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
30861 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
30862 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
30863 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
30864 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
30865 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
30866 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
30867 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
30868 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
30869 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
30870 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
30871 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
30872 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
30873 { "string", 0, false, true },
30874 { "update", OPTION_MASK_NO_UPDATE, true , true },
30875 { "vsx", OPTION_MASK_VSX, false, true },
30876 #ifdef OPTION_MASK_64BIT
30878 { "aix64", OPTION_MASK_64BIT, false, false },
30879 { "aix32", OPTION_MASK_64BIT, true, false },
30881 { "64", OPTION_MASK_64BIT, false, false },
30882 { "32", OPTION_MASK_64BIT, true, false },
30885 #ifdef OPTION_MASK_EABI
30886 { "eabi", OPTION_MASK_EABI, false, false },
30888 #ifdef OPTION_MASK_LITTLE_ENDIAN
30889 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
30890 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
30892 #ifdef OPTION_MASK_RELOCATABLE
30893 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
30895 #ifdef OPTION_MASK_STRICT_ALIGN
30896 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
30898 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
30899 { "string", 0, false, false },
30902 /* Builtin mask mapping for printing the flags. */
30903 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
30905 { "altivec", RS6000_BTM_ALTIVEC, false, false },
30906 { "vsx", RS6000_BTM_VSX, false, false },
30907 { "fre", RS6000_BTM_FRE, false, false },
30908 { "fres", RS6000_BTM_FRES, false, false },
30909 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
30910 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
30911 { "popcntd", RS6000_BTM_POPCNTD, false, false },
30912 { "cell", RS6000_BTM_CELL, false, false },
30913 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
30914 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
30915 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
30916 { "crypto", RS6000_BTM_CRYPTO, false, false },
30917 { "htm", RS6000_BTM_HTM, false, false },
30918 { "hard-dfp", RS6000_BTM_DFP, false, false },
30919 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
30920 { "long-double-128", RS6000_BTM_LDBL128, false, false },
30921 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
30922 { "float128", RS6000_BTM_FLOAT128, false, false },
30923 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
30926 /* Option variables that we want to support inside attribute((target)) and
30927 #pragma GCC target operations. */
30929 struct rs6000_opt_var {
30930 const char *name; /* option name */
30931 size_t global_offset; /* offset of the option in global_options. */
30932 size_t target_offset; /* offset of the option in target options. */
30935 static struct rs6000_opt_var const rs6000_opt_vars[] =
30938 offsetof (struct gcc_options, x_TARGET_FRIZ),
30939 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
30940 { "avoid-indexed-addresses",
30941 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
30942 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
30944 offsetof (struct gcc_options, x_rs6000_default_long_calls),
30945 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
30946 { "optimize-swaps",
30947 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
30948 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
30949 { "allow-movmisalign",
30950 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
30951 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
30953 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
30954 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
30956 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
30957 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
30958 { "align-branch-targets",
30959 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
30960 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
30962 offsetof (struct gcc_options, x_tls_markers),
30963 offsetof (struct cl_target_option, x_tls_markers), },
30965 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30966 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30968 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30969 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30970 { "speculate-indirect-jumps",
30971 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
30972 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
30975 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
30976 parsing. Return true if there were no errors. */
30979 rs6000_inner_target_options (tree args, bool attr_p)
30983 if (args == NULL_TREE)
30986 else if (TREE_CODE (args) == STRING_CST)
30988 char *p = ASTRDUP (TREE_STRING_POINTER (args));
30991 while ((q = strtok (p, ",")) != NULL)
30993 bool error_p = false;
30994 bool not_valid_p = false;
30995 const char *cpu_opt = NULL;
30998 if (strncmp (q, "cpu=", 4) == 0)
31000 int cpu_index = rs6000_cpu_name_lookup (q+4);
31001 if (cpu_index >= 0)
31002 rs6000_cpu_index = cpu_index;
31009 else if (strncmp (q, "tune=", 5) == 0)
31011 int tune_index = rs6000_cpu_name_lookup (q+5);
31012 if (tune_index >= 0)
31013 rs6000_tune_index = tune_index;
31023 bool invert = false;
31027 if (strncmp (r, "no-", 3) == 0)
31033 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
31034 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
31036 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
31038 if (!rs6000_opt_masks[i].valid_target)
31039 not_valid_p = true;
31043 rs6000_isa_flags_explicit |= mask;
31045 /* VSX needs altivec, so -mvsx automagically sets
31046 altivec and disables -mavoid-indexed-addresses. */
31049 if (mask == OPTION_MASK_VSX)
31051 mask |= OPTION_MASK_ALTIVEC;
31052 TARGET_AVOID_XFORM = 0;
31056 if (rs6000_opt_masks[i].invert)
31060 rs6000_isa_flags &= ~mask;
31062 rs6000_isa_flags |= mask;
31067 if (error_p && !not_valid_p)
31069 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
31070 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
31072 size_t j = rs6000_opt_vars[i].global_offset;
31073 *((int *) ((char *)&global_options + j)) = !invert;
31075 not_valid_p = false;
31083 const char *eprefix, *esuffix;
31088 eprefix = "__attribute__((__target__(";
31093 eprefix = "#pragma GCC target ";
31098 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
31100 else if (not_valid_p)
31101 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
31103 error ("%s%qs%s is invalid", eprefix, q, esuffix);
31108 else if (TREE_CODE (args) == TREE_LIST)
31112 tree value = TREE_VALUE (args);
31115 bool ret2 = rs6000_inner_target_options (value, attr_p);
31119 args = TREE_CHAIN (args);
31121 while (args != NULL_TREE);
31126 error ("attribute %<target%> argument not a string");
31133 /* Print out the target options as a list for -mdebug=target. */
31136 rs6000_debug_target_options (tree args, const char *prefix)
31138 if (args == NULL_TREE)
31139 fprintf (stderr, "%s<NULL>", prefix);
31141 else if (TREE_CODE (args) == STRING_CST)
31143 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31146 while ((q = strtok (p, ",")) != NULL)
31149 fprintf (stderr, "%s\"%s\"", prefix, q);
31154 else if (TREE_CODE (args) == TREE_LIST)
31158 tree value = TREE_VALUE (args);
31161 rs6000_debug_target_options (value, prefix);
31164 args = TREE_CHAIN (args);
31166 while (args != NULL_TREE);
31170 gcc_unreachable ();
31176 /* Hook to validate attribute((target("..."))). */
31179 rs6000_valid_attribute_p (tree fndecl,
31180 tree ARG_UNUSED (name),
31184 struct cl_target_option cur_target;
31187 tree new_target, new_optimize;
31188 tree func_optimize;
31190 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31192 if (TARGET_DEBUG_TARGET)
31194 tree tname = DECL_NAME (fndecl);
31195 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
31197 fprintf (stderr, "function: %.*s\n",
31198 (int) IDENTIFIER_LENGTH (tname),
31199 IDENTIFIER_POINTER (tname));
31201 fprintf (stderr, "function: unknown\n");
31203 fprintf (stderr, "args:");
31204 rs6000_debug_target_options (args, " ");
31205 fprintf (stderr, "\n");
31208 fprintf (stderr, "flags: 0x%x\n", flags);
31210 fprintf (stderr, "--------------------\n");
31213 /* attribute((target("default"))) does nothing, beyond
31214 affecting multi-versioning. */
31215 if (TREE_VALUE (args)
31216 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
31217 && TREE_CHAIN (args) == NULL_TREE
31218 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
31221 old_optimize = build_optimization_node (&global_options);
31222 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31224 /* If the function changed the optimization levels as well as setting target
31225 options, start with the optimizations specified. */
31226 if (func_optimize && func_optimize != old_optimize)
31227 cl_optimization_restore (&global_options,
31228 TREE_OPTIMIZATION (func_optimize));
31230 /* The target attributes may also change some optimization flags, so update
31231 the optimization options if necessary. */
31232 cl_target_option_save (&cur_target, &global_options);
31233 rs6000_cpu_index = rs6000_tune_index = -1;
31234 ret = rs6000_inner_target_options (args, true);
31236 /* Set up any additional state. */
31239 ret = rs6000_option_override_internal (false);
31240 new_target = build_target_option_node (&global_options);
31245 new_optimize = build_optimization_node (&global_options);
31252 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
31254 if (old_optimize != new_optimize)
31255 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31258 cl_target_option_restore (&global_options, &cur_target);
31260 if (old_optimize != new_optimize)
31261 cl_optimization_restore (&global_options,
31262 TREE_OPTIMIZATION (old_optimize));
31268 /* Hook to validate the current #pragma GCC target and set the state, and
31269 update the macros based on what was changed. If ARGS is NULL, then
31270 POP_TARGET is used to reset the options. */
31273 rs6000_pragma_target_parse (tree args, tree pop_target)
31275 tree prev_tree = build_target_option_node (&global_options);
31277 struct cl_target_option *prev_opt, *cur_opt;
31278 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
31279 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
31281 if (TARGET_DEBUG_TARGET)
31283 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
31284 fprintf (stderr, "args:");
31285 rs6000_debug_target_options (args, " ");
31286 fprintf (stderr, "\n");
31290 fprintf (stderr, "pop_target:\n");
31291 debug_tree (pop_target);
31294 fprintf (stderr, "pop_target: <NULL>\n");
31296 fprintf (stderr, "--------------------\n");
31301 cur_tree = ((pop_target)
31303 : target_option_default_node);
31304 cl_target_option_restore (&global_options,
31305 TREE_TARGET_OPTION (cur_tree));
31309 rs6000_cpu_index = rs6000_tune_index = -1;
31310 if (!rs6000_inner_target_options (args, false)
31311 || !rs6000_option_override_internal (false)
31312 || (cur_tree = build_target_option_node (&global_options))
31315 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
31316 fprintf (stderr, "invalid pragma\n");
31322 target_option_current_node = cur_tree;
31323 rs6000_activate_target_options (target_option_current_node);
31325 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
31326 change the macros that are defined. */
31327 if (rs6000_target_modify_macros_ptr)
31329 prev_opt = TREE_TARGET_OPTION (prev_tree);
31330 prev_bumask = prev_opt->x_rs6000_builtin_mask;
31331 prev_flags = prev_opt->x_rs6000_isa_flags;
31333 cur_opt = TREE_TARGET_OPTION (cur_tree);
31334 cur_flags = cur_opt->x_rs6000_isa_flags;
31335 cur_bumask = cur_opt->x_rs6000_builtin_mask;
31337 diff_bumask = (prev_bumask ^ cur_bumask);
31338 diff_flags = (prev_flags ^ cur_flags);
31340 if ((diff_flags != 0) || (diff_bumask != 0))
31342 /* Delete old macros. */
31343 rs6000_target_modify_macros_ptr (false,
31344 prev_flags & diff_flags,
31345 prev_bumask & diff_bumask);
31347 /* Define new macros. */
31348 rs6000_target_modify_macros_ptr (true,
31349 cur_flags & diff_flags,
31350 cur_bumask & diff_bumask);
31358 /* Remember the last target of rs6000_set_current_function. */
31359 static GTY(()) tree rs6000_previous_fndecl;
31361 /* Restore target's globals from NEW_TREE and invalidate the
31362 rs6000_previous_fndecl cache. */
31365 rs6000_activate_target_options (tree new_tree)
31367 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31368 if (TREE_TARGET_GLOBALS (new_tree))
31369 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31370 else if (new_tree == target_option_default_node)
31371 restore_target_globals (&default_target_globals);
31373 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31374 rs6000_previous_fndecl = NULL_TREE;
31377 /* Establish appropriate back-end context for processing the function
31378 FNDECL. The argument might be NULL to indicate processing at top
31379 level, outside of any function scope. */
31381 rs6000_set_current_function (tree fndecl)
31383 if (TARGET_DEBUG_TARGET)
31385 fprintf (stderr, "\n==================== rs6000_set_current_function");
31388 fprintf (stderr, ", fndecl %s (%p)",
31389 (DECL_NAME (fndecl)
31390 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
31391 : "<unknown>"), (void *)fndecl);
31393 if (rs6000_previous_fndecl)
31394 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
31396 fprintf (stderr, "\n");
31399 /* Only change the context if the function changes. This hook is called
31400 several times in the course of compiling a function, and we don't want to
31401 slow things down too much or call target_reinit when it isn't safe. */
31402 if (fndecl == rs6000_previous_fndecl)
31406 if (rs6000_previous_fndecl == NULL_TREE)
31407 old_tree = target_option_current_node;
31408 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
31409 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
31411 old_tree = target_option_default_node;
31414 if (fndecl == NULL_TREE)
31416 if (old_tree != target_option_current_node)
31417 new_tree = target_option_current_node;
31419 new_tree = NULL_TREE;
31423 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31424 if (new_tree == NULL_TREE)
31425 new_tree = target_option_default_node;
31428 if (TARGET_DEBUG_TARGET)
31432 fprintf (stderr, "\nnew fndecl target specific options:\n");
31433 debug_tree (new_tree);
31438 fprintf (stderr, "\nold fndecl target specific options:\n");
31439 debug_tree (old_tree);
31442 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
31443 fprintf (stderr, "--------------------\n");
31446 if (new_tree && old_tree != new_tree)
31447 rs6000_activate_target_options (new_tree);
31450 rs6000_previous_fndecl = fndecl;
31454 /* Save the current options */
31457 rs6000_function_specific_save (struct cl_target_option *ptr,
31458 struct gcc_options *opts)
31460 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
31461 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
31464 /* Restore the current options */
31467 rs6000_function_specific_restore (struct gcc_options *opts,
31468 struct cl_target_option *ptr)
31471 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
31472 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
31473 (void) rs6000_option_override_internal (false);
31476 /* Print the current options */
31479 rs6000_function_specific_print (FILE *file, int indent,
31480 struct cl_target_option *ptr)
31482 rs6000_print_isa_options (file, indent, "Isa options set",
31483 ptr->x_rs6000_isa_flags);
31485 rs6000_print_isa_options (file, indent, "Isa options explicit",
31486 ptr->x_rs6000_isa_flags_explicit);
31489 /* Helper function to print the current isa or misc options on a line. */
31492 rs6000_print_options_internal (FILE *file,
31494 const char *string,
31495 HOST_WIDE_INT flags,
31496 const char *prefix,
31497 const struct rs6000_opt_mask *opts,
31498 size_t num_elements)
31501 size_t start_column = 0;
31503 size_t max_column = 120;
31504 size_t prefix_len = strlen (prefix);
31505 size_t comma_len = 0;
31506 const char *comma = "";
31509 start_column += fprintf (file, "%*s", indent, "");
31513 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
31517 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
31519 /* Print the various mask options. */
31520 cur_column = start_column;
31521 for (i = 0; i < num_elements; i++)
31523 bool invert = opts[i].invert;
31524 const char *name = opts[i].name;
31525 const char *no_str = "";
31526 HOST_WIDE_INT mask = opts[i].mask;
31527 size_t len = comma_len + prefix_len + strlen (name);
31531 if ((flags & mask) == 0)
31534 len += sizeof ("no-") - 1;
31542 if ((flags & mask) != 0)
31545 len += sizeof ("no-") - 1;
31552 if (cur_column > max_column)
31554 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
31555 cur_column = start_column + len;
31559 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
31561 comma_len = sizeof (", ") - 1;
31564 fputs ("\n", file);
31567 /* Helper function to print the current isa options on a line. */
31570 rs6000_print_isa_options (FILE *file, int indent, const char *string,
31571 HOST_WIDE_INT flags)
31573 rs6000_print_options_internal (file, indent, string, flags, "-m",
31574 &rs6000_opt_masks[0],
31575 ARRAY_SIZE (rs6000_opt_masks));
31579 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
31580 HOST_WIDE_INT flags)
31582 rs6000_print_options_internal (file, indent, string, flags, "",
31583 &rs6000_builtin_mask_names[0],
31584 ARRAY_SIZE (rs6000_builtin_mask_names));
31587 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
31588 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
31589 -mupper-regs-df, etc.).
31591 If the user used -mno-power8-vector, we need to turn off all of the implicit
31592 ISA 2.07 and 3.0 options that relate to the vector unit.
31594 If the user used -mno-power9-vector, we need to turn off all of the implicit
31595 ISA 3.0 options that relate to the vector unit.
31597 This function does not handle explicit options such as the user specifying
31598 -mdirect-move. These are handled in rs6000_option_override_internal, and
31599 the appropriate error is given if needed.
31601 We return a mask of all of the implicit options that should not be enabled
31604 static HOST_WIDE_INT
31605 rs6000_disable_incompatible_switches (void)
31607 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
31610 static const struct {
31611 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
31612 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
31613 const char *const name; /* name of the switch. */
31615 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
31616 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
31617 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
31618 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
31621 for (i = 0; i < ARRAY_SIZE (flags); i++)
31623 HOST_WIDE_INT no_flag = flags[i].no_flag;
31625 if ((rs6000_isa_flags & no_flag) == 0
31626 && (rs6000_isa_flags_explicit & no_flag) != 0)
31628 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
31629 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
31635 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
31636 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
31638 set_flags &= ~rs6000_opt_masks[j].mask;
31639 error ("%<-mno-%s%> turns off %<-m%s%>",
31641 rs6000_opt_masks[j].name);
31644 gcc_assert (!set_flags);
31647 rs6000_isa_flags &= ~dep_flags;
31648 ignore_masks |= no_flag | dep_flags;
31652 return ignore_masks;
31656 /* Helper function for printing the function name when debugging. */
31658 static const char *
31659 get_decl_name (tree fn)
31666 name = DECL_NAME (fn);
31668 return "<no-name>";
31670 return IDENTIFIER_POINTER (name);
31673 /* Return the clone id of the target we are compiling code for in a target
31674 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
31675 the priority list for the target clones (ordered from lowest to
31679 rs6000_clone_priority (tree fndecl)
31681 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31682 HOST_WIDE_INT isa_masks;
31683 int ret = CLONE_DEFAULT;
31684 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
31685 const char *attrs_str = NULL;
31687 attrs = TREE_VALUE (TREE_VALUE (attrs));
31688 attrs_str = TREE_STRING_POINTER (attrs);
31690 /* Return priority zero for default function. Return the ISA needed for the
31691 function if it is not the default. */
31692 if (strcmp (attrs_str, "default") != 0)
31694 if (fn_opts == NULL_TREE)
31695 fn_opts = target_option_default_node;
31697 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
31698 isa_masks = rs6000_isa_flags;
31700 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
31702 for (ret = CLONE_MAX - 1; ret != 0; ret--)
31703 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
31707 if (TARGET_DEBUG_TARGET)
31708 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
31709 get_decl_name (fndecl), ret);
31714 /* This compares the priority of target features in function DECL1 and DECL2.
31715 It returns positive value if DECL1 is higher priority, negative value if
31716 DECL2 is higher priority and 0 if they are the same. Note, priorities are
31717 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
31720 rs6000_compare_version_priority (tree decl1, tree decl2)
31722 int priority1 = rs6000_clone_priority (decl1);
31723 int priority2 = rs6000_clone_priority (decl2);
31724 int ret = priority1 - priority2;
31726 if (TARGET_DEBUG_TARGET)
31727 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
31728 get_decl_name (decl1), get_decl_name (decl2), ret);
31733 /* Make a dispatcher declaration for the multi-versioned function DECL.
31734 Calls to DECL function will be replaced with calls to the dispatcher
31735 by the front-end. Returns the decl of the dispatcher function. */
31738 rs6000_get_function_versions_dispatcher (void *decl)
31740 tree fn = (tree) decl;
31741 struct cgraph_node *node = NULL;
31742 struct cgraph_node *default_node = NULL;
31743 struct cgraph_function_version_info *node_v = NULL;
31744 struct cgraph_function_version_info *first_v = NULL;
31746 tree dispatch_decl = NULL;
31748 struct cgraph_function_version_info *default_version_info = NULL;
31749 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31751 if (TARGET_DEBUG_TARGET)
31752 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
31753 get_decl_name (fn));
31755 node = cgraph_node::get (fn);
31756 gcc_assert (node != NULL);
31758 node_v = node->function_version ();
31759 gcc_assert (node_v != NULL);
31761 if (node_v->dispatcher_resolver != NULL)
31762 return node_v->dispatcher_resolver;
31764 /* Find the default version and make it the first node. */
31766 /* Go to the beginning of the chain. */
31767 while (first_v->prev != NULL)
31768 first_v = first_v->prev;
31770 default_version_info = first_v;
31771 while (default_version_info != NULL)
31773 const tree decl2 = default_version_info->this_node->decl;
31774 if (is_function_default_version (decl2))
31776 default_version_info = default_version_info->next;
31779 /* If there is no default node, just return NULL. */
31780 if (default_version_info == NULL)
31783 /* Make default info the first node. */
31784 if (first_v != default_version_info)
31786 default_version_info->prev->next = default_version_info->next;
31787 if (default_version_info->next)
31788 default_version_info->next->prev = default_version_info->prev;
31789 first_v->prev = default_version_info;
31790 default_version_info->next = first_v;
31791 default_version_info->prev = NULL;
31794 default_node = default_version_info->this_node;
31796 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
31797 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31798 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
31799 "exports hardware capability bits");
31802 if (targetm.has_ifunc_p ())
31804 struct cgraph_function_version_info *it_v = NULL;
31805 struct cgraph_node *dispatcher_node = NULL;
31806 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31808 /* Right now, the dispatching is done via ifunc. */
31809 dispatch_decl = make_dispatcher_decl (default_node->decl);
31811 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31812 gcc_assert (dispatcher_node != NULL);
31813 dispatcher_node->dispatcher_function = 1;
31814 dispatcher_version_info
31815 = dispatcher_node->insert_new_function_version ();
31816 dispatcher_version_info->next = default_version_info;
31817 dispatcher_node->definition = 1;
31819 /* Set the dispatcher for all the versions. */
31820 it_v = default_version_info;
31821 while (it_v != NULL)
31823 it_v->dispatcher_resolver = dispatch_decl;
31829 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31830 "multiversioning needs ifunc which is not supported "
31835 return dispatch_decl;
31838 /* Make the resolver function decl to dispatch the versions of a multi-
31839 versioned function, DEFAULT_DECL. Create an empty basic block in the
31840 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
31844 make_resolver_func (const tree default_decl,
31845 const tree dispatch_decl,
31846 basic_block *empty_bb)
31848 /* Make the resolver function static. The resolver function returns
31850 tree decl_name = clone_function_name (default_decl, "resolver");
31851 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
31852 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
31853 tree decl = build_fn_decl (resolver_name, type);
31854 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31856 DECL_NAME (decl) = decl_name;
31857 TREE_USED (decl) = 1;
31858 DECL_ARTIFICIAL (decl) = 1;
31859 DECL_IGNORED_P (decl) = 0;
31860 TREE_PUBLIC (decl) = 0;
31861 DECL_UNINLINABLE (decl) = 1;
31863 /* Resolver is not external, body is generated. */
31864 DECL_EXTERNAL (decl) = 0;
31865 DECL_EXTERNAL (dispatch_decl) = 0;
31867 DECL_CONTEXT (decl) = NULL_TREE;
31868 DECL_INITIAL (decl) = make_node (BLOCK);
31869 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31871 /* Build result decl and add to function_decl. */
31872 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31873 DECL_CONTEXT (t) = decl;
31874 DECL_ARTIFICIAL (t) = 1;
31875 DECL_IGNORED_P (t) = 1;
31876 DECL_RESULT (decl) = t;
31878 gimplify_function_tree (decl);
31879 push_cfun (DECL_STRUCT_FUNCTION (decl));
31880 *empty_bb = init_lowered_empty_function (decl, false,
31881 profile_count::uninitialized ());
31883 cgraph_node::add_new_function (decl, true);
31884 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31888 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
31889 DECL_ATTRIBUTES (dispatch_decl)
31890 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
31892 cgraph_node::create_same_body_alias (dispatch_decl, decl);
31897 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
31898 return a pointer to VERSION_DECL if we are running on a machine that
31899 supports the index CLONE_ISA hardware architecture bits. This function will
31900 be called during version dispatch to decide which function version to
31901 execute. It returns the basic block at the end, to which more conditions
31905 add_condition_to_bb (tree function_decl, tree version_decl,
31906 int clone_isa, basic_block new_bb)
31908 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31910 gcc_assert (new_bb != NULL);
31911 gimple_seq gseq = bb_seq (new_bb);
31914 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31915 build_fold_addr_expr (version_decl));
31916 tree result_var = create_tmp_var (ptr_type_node);
31917 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
31918 gimple *return_stmt = gimple_build_return (result_var);
31920 if (clone_isa == CLONE_DEFAULT)
31922 gimple_seq_add_stmt (&gseq, convert_stmt);
31923 gimple_seq_add_stmt (&gseq, return_stmt);
31924 set_bb_seq (new_bb, gseq);
31925 gimple_set_bb (convert_stmt, new_bb);
31926 gimple_set_bb (return_stmt, new_bb);
31931 tree bool_zero = build_int_cst (bool_int_type_node, 0);
31932 tree cond_var = create_tmp_var (bool_int_type_node);
31933 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
31934 const char *arg_str = rs6000_clone_map[clone_isa].name;
31935 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31936 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31937 gimple_call_set_lhs (call_cond_stmt, cond_var);
31939 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31940 gimple_set_bb (call_cond_stmt, new_bb);
31941 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31943 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
31944 NULL_TREE, NULL_TREE);
31945 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31946 gimple_set_bb (if_else_stmt, new_bb);
31947 gimple_seq_add_stmt (&gseq, if_else_stmt);
31949 gimple_seq_add_stmt (&gseq, convert_stmt);
31950 gimple_seq_add_stmt (&gseq, return_stmt);
31951 set_bb_seq (new_bb, gseq);
31953 basic_block bb1 = new_bb;
31954 edge e12 = split_block (bb1, if_else_stmt);
31955 basic_block bb2 = e12->dest;
31956 e12->flags &= ~EDGE_FALLTHRU;
31957 e12->flags |= EDGE_TRUE_VALUE;
31959 edge e23 = split_block (bb2, return_stmt);
31960 gimple_set_bb (convert_stmt, bb2);
31961 gimple_set_bb (return_stmt, bb2);
31963 basic_block bb3 = e23->dest;
31964 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31967 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31973 /* This function generates the dispatch function for multi-versioned functions.
31974 DISPATCH_DECL is the function which will contain the dispatch logic.
31975 FNDECLS are the function choices for dispatch, and is a tree chain.
31976 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
31977 code is generated. */
31980 dispatch_function_versions (tree dispatch_decl,
31982 basic_block *empty_bb)
31986 vec<tree> *fndecls;
31987 tree clones[CLONE_MAX];
31989 if (TARGET_DEBUG_TARGET)
31990 fputs ("dispatch_function_versions, top\n", stderr);
31992 gcc_assert (dispatch_decl != NULL
31993 && fndecls_p != NULL
31994 && empty_bb != NULL);
31996 /* fndecls_p is actually a vector. */
31997 fndecls = static_cast<vec<tree> *> (fndecls_p);
31999 /* At least one more version other than the default. */
32000 gcc_assert (fndecls->length () >= 2);
32002 /* The first version in the vector is the default decl. */
32003 memset ((void *) clones, '\0', sizeof (clones));
32004 clones[CLONE_DEFAULT] = (*fndecls)[0];
32006 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
32007 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
32008 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
32009 recent glibc. If we ever need to call __builtin_cpu_init, we would need
32010 to insert the code here to do the call. */
32012 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32014 int priority = rs6000_clone_priority (ele);
32015 if (!clones[priority])
32016 clones[priority] = ele;
32019 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
32022 if (TARGET_DEBUG_TARGET)
32023 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
32024 ix, get_decl_name (clones[ix]));
32026 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
32033 /* Generate the dispatching code body to dispatch multi-versioned function
32034 DECL. The target hook is called to process the "target" attributes and
32035 provide the code to dispatch the right function at run-time. NODE points
32036 to the dispatcher decl whose body will be created. */
32039 rs6000_generate_version_dispatcher_body (void *node_p)
32042 basic_block empty_bb;
32043 struct cgraph_node *node = (cgraph_node *) node_p;
32044 struct cgraph_function_version_info *ninfo = node->function_version ();
32046 if (ninfo->dispatcher_resolver)
32047 return ninfo->dispatcher_resolver;
32049 /* node is going to be an alias, so remove the finalized bit. */
32050 node->definition = false;
32052 /* The first version in the chain corresponds to the default version. */
32053 ninfo->dispatcher_resolver = resolver
32054 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
32056 if (TARGET_DEBUG_TARGET)
32057 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
32058 get_decl_name (resolver));
32060 push_cfun (DECL_STRUCT_FUNCTION (resolver));
32061 auto_vec<tree, 2> fn_ver_vec;
32063 for (struct cgraph_function_version_info *vinfo = ninfo->next;
32065 vinfo = vinfo->next)
32067 struct cgraph_node *version = vinfo->this_node;
32068 /* Check for virtual functions here again, as by this time it should
32069 have been determined if this function needs a vtable index or
32070 not. This happens for methods in derived classes that override
32071 virtual methods in base classes but are not explicitly marked as
32073 if (DECL_VINDEX (version->decl))
32074 sorry ("Virtual function multiversioning not supported");
32076 fn_ver_vec.safe_push (version->decl);
32079 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
32080 cgraph_edge::rebuild_edges ();
32086 /* Hook to determine if one function can safely inline another. */
32089 rs6000_can_inline_p (tree caller, tree callee)
32092 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32093 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32095 /* If callee has no option attributes, then it is ok to inline. */
32099 /* If caller has no option attributes, but callee does then it is not ok to
32101 else if (!caller_tree)
32106 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32107 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32109 /* Callee's options should a subset of the caller's, i.e. a vsx function
32110 can inline an altivec function but a non-vsx function can't inline a
32112 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32113 == callee_opts->x_rs6000_isa_flags)
32117 if (TARGET_DEBUG_TARGET)
32118 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32119 get_decl_name (caller), get_decl_name (callee),
32120 (ret ? "can" : "cannot"));
32125 /* Allocate a stack temp and fixup the address so it meets the particular
32126 memory requirements (either offetable or REG+REG addressing). */
32129 rs6000_allocate_stack_temp (machine_mode mode,
32130 bool offsettable_p,
32133 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32134 rtx addr = XEXP (stack, 0);
32135 int strict_p = reload_completed;
32137 if (!legitimate_indirect_address_p (addr, strict_p))
32140 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32141 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32143 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32144 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32150 /* Given a memory reference, if it is not a reg or reg+reg addressing,
32151 convert to such a form to deal with memory reference instructions
32152 like STFIWX and LDBRX that only take reg+reg addressing. */
32155 rs6000_force_indexed_or_indirect_mem (rtx x)
32157 machine_mode mode = GET_MODE (x);
32159 gcc_assert (MEM_P (x));
32160 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
32162 rtx addr = XEXP (x, 0);
32163 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32165 rtx reg = XEXP (addr, 0);
32166 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32167 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32168 gcc_assert (REG_P (reg));
32169 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32172 else if (GET_CODE (addr) == PRE_MODIFY)
32174 rtx reg = XEXP (addr, 0);
32175 rtx expr = XEXP (addr, 1);
32176 gcc_assert (REG_P (reg));
32177 gcc_assert (GET_CODE (expr) == PLUS);
32178 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32182 x = replace_equiv_address (x, force_reg (Pmode, addr));
32188 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32190 On the RS/6000, all integer constants are acceptable, most won't be valid
32191 for particular insns, though. Only easy FP constants are acceptable. */
32194 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32196 if (TARGET_ELF && tls_referenced_p (x))
32199 if (CONST_DOUBLE_P (x))
32200 return easy_fp_constant (x, mode);
32202 if (GET_CODE (x) == CONST_VECTOR)
32203 return easy_vector_constant (x, mode);
32209 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
32212 chain_already_loaded (rtx_insn *last)
32214 for (; last != NULL; last = PREV_INSN (last))
32216 if (NONJUMP_INSN_P (last))
32218 rtx patt = PATTERN (last);
32220 if (GET_CODE (patt) == SET)
32222 rtx lhs = XEXP (patt, 0);
32224 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
32232 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32235 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32237 rtx func = func_desc;
32238 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32239 rtx toc_load = NULL_RTX;
32240 rtx toc_restore = NULL_RTX;
32242 rtx abi_reg = NULL_RTX;
32246 bool is_pltseq_longcall;
32249 tlsarg = global_tlsarg;
32251 /* Handle longcall attributes. */
32252 is_pltseq_longcall = false;
32253 if ((INTVAL (cookie) & CALL_LONG) != 0
32254 && GET_CODE (func_desc) == SYMBOL_REF)
32256 func = rs6000_longcall_ref (func_desc, tlsarg);
32258 is_pltseq_longcall = true;
32261 /* Handle indirect calls. */
32262 if (!SYMBOL_REF_P (func)
32263 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
32265 if (!rs6000_pcrel_p (cfun))
32267 /* Save the TOC into its reserved slot before the call,
32268 and prepare to restore it after the call. */
32269 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32270 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
32271 gen_rtvec (1, stack_toc_offset),
32273 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
32275 /* Can we optimize saving the TOC in the prologue or
32276 do we need to do it at every call? */
32277 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32278 cfun->machine->save_toc_in_prologue = true;
32281 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32282 rtx stack_toc_mem = gen_frame_mem (Pmode,
32283 gen_rtx_PLUS (Pmode, stack_ptr,
32284 stack_toc_offset));
32285 MEM_VOLATILE_P (stack_toc_mem) = 1;
32286 if (is_pltseq_longcall)
32288 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
32289 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32290 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
32293 emit_move_insn (stack_toc_mem, toc_reg);
32297 if (DEFAULT_ABI == ABI_ELFv2)
32299 /* A function pointer in the ELFv2 ABI is just a plain address, but
32300 the ABI requires it to be loaded into r12 before the call. */
32301 func_addr = gen_rtx_REG (Pmode, 12);
32302 if (!rtx_equal_p (func_addr, func))
32303 emit_move_insn (func_addr, func);
32304 abi_reg = func_addr;
32305 /* Indirect calls via CTR are strongly preferred over indirect
32306 calls via LR, so move the address there. Needed to mark
32307 this insn for linker plt sequence editing too. */
32308 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32309 if (is_pltseq_longcall)
32311 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
32312 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32313 emit_insn (gen_rtx_SET (func_addr, mark_func));
32314 v = gen_rtvec (2, func_addr, func_desc);
32315 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32318 emit_move_insn (func_addr, abi_reg);
32322 /* A function pointer under AIX is a pointer to a data area whose
32323 first word contains the actual address of the function, whose
32324 second word contains a pointer to its TOC, and whose third word
32325 contains a value to place in the static chain register (r11).
32326 Note that if we load the static chain, our "trampoline" need
32327 not have any executable code. */
32329 /* Load up address of the actual function. */
32330 func = force_reg (Pmode, func);
32331 func_addr = gen_reg_rtx (Pmode);
32332 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
32334 /* Indirect calls via CTR are strongly preferred over indirect
32335 calls via LR, so move the address there. */
32336 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
32337 emit_move_insn (ctr_reg, func_addr);
32338 func_addr = ctr_reg;
32340 /* Prepare to load the TOC of the called function. Note that the
32341 TOC load must happen immediately before the actual call so
32342 that unwinding the TOC registers works correctly. See the
32343 comment in frob_update_context. */
32344 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32345 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32346 gen_rtx_PLUS (Pmode, func,
32348 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32350 /* If we have a static chain, load it up. But, if the call was
32351 originally direct, the 3rd word has not been written since no
32352 trampoline has been built, so we ought not to load it, lest we
32353 override a static chain value. */
32354 if (!(GET_CODE (func_desc) == SYMBOL_REF
32355 && SYMBOL_REF_FUNCTION_P (func_desc))
32356 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
32357 && !chain_already_loaded (get_current_sequence ()->next->last))
32359 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32360 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32361 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32362 gen_rtx_PLUS (Pmode, func,
32364 emit_move_insn (sc_reg, func_sc_mem);
32371 /* No TOC register needed for calls from PC-relative callers. */
32372 if (!rs6000_pcrel_p (cfun))
32373 /* Direct calls use the TOC: for local calls, the callee will
32374 assume the TOC register is set; for non-local calls, the
32375 PLT stub needs the TOC register. */
32380 /* Create the call. */
32381 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32382 if (value != NULL_RTX)
32383 call[0] = gen_rtx_SET (value, call[0]);
32387 call[n_call++] = toc_load;
32389 call[n_call++] = toc_restore;
32391 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32393 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32394 insn = emit_call_insn (insn);
32396 /* Mention all registers defined by the ABI to hold information
32397 as uses in CALL_INSN_FUNCTION_USAGE. */
32399 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32402 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32405 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32410 gcc_assert (INTVAL (cookie) == 0);
32413 tlsarg = global_tlsarg;
32415 /* Create the call. */
32416 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
32417 if (value != NULL_RTX)
32418 call[0] = gen_rtx_SET (value, call[0]);
32420 call[1] = simple_return_rtx;
32422 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32423 insn = emit_call_insn (insn);
32425 /* Note use of the TOC register. */
32426 if (!rs6000_pcrel_p (cfun))
32427 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
32428 gen_rtx_REG (Pmode, TOC_REGNUM));
32431 /* Expand code to perform a call under the SYSV4 ABI. */
32434 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32436 rtx func = func_desc;
32440 rtx abi_reg = NULL_RTX;
32444 tlsarg = global_tlsarg;
32446 /* Handle longcall attributes. */
32447 if ((INTVAL (cookie) & CALL_LONG) != 0
32448 && GET_CODE (func_desc) == SYMBOL_REF)
32450 func = rs6000_longcall_ref (func_desc, tlsarg);
32451 /* If the longcall was implemented as an inline PLT call using
32452 PLT unspecs then func will be REG:r11. If not, func will be
32453 a pseudo reg. The inline PLT call sequence supports lazy
32454 linking (and longcalls to functions in dlopen'd libraries).
32455 The other style of longcalls don't. The lazy linking entry
32456 to the dynamic symbol resolver requires r11 be the function
32457 address (as it is for linker generated PLT stubs). Ensure
32458 r11 stays valid to the bctrl by marking r11 used by the call. */
32463 /* Handle indirect calls. */
32464 if (GET_CODE (func) != SYMBOL_REF)
32466 func = force_reg (Pmode, func);
32468 /* Indirect calls via CTR are strongly preferred over indirect
32469 calls via LR, so move the address there. That can't be left
32470 to reload because we want to mark every instruction in an
32471 inline PLT call sequence with a reloc, enabling the linker to
32472 edit the sequence back to a direct call when that makes sense. */
32473 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32476 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32477 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32478 emit_insn (gen_rtx_SET (func_addr, mark_func));
32479 v = gen_rtvec (2, func_addr, func_desc);
32480 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32483 emit_move_insn (func_addr, func);
32488 /* Create the call. */
32489 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32490 if (value != NULL_RTX)
32491 call[0] = gen_rtx_SET (value, call[0]);
32493 call[1] = gen_rtx_USE (VOIDmode, cookie);
32495 if (TARGET_SECURE_PLT
32497 && GET_CODE (func_addr) == SYMBOL_REF
32498 && !SYMBOL_REF_LOCAL_P (func_addr))
32499 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
32501 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32503 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
32504 insn = emit_call_insn (insn);
32506 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32509 /* Expand code to perform a sibling call under the SysV4 ABI. */
32512 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32514 rtx func = func_desc;
32518 rtx abi_reg = NULL_RTX;
32521 tlsarg = global_tlsarg;
32523 /* Handle longcall attributes. */
32524 if ((INTVAL (cookie) & CALL_LONG) != 0
32525 && GET_CODE (func_desc) == SYMBOL_REF)
32527 func = rs6000_longcall_ref (func_desc, tlsarg);
32528 /* If the longcall was implemented as an inline PLT call using
32529 PLT unspecs then func will be REG:r11. If not, func will be
32530 a pseudo reg. The inline PLT call sequence supports lazy
32531 linking (and longcalls to functions in dlopen'd libraries).
32532 The other style of longcalls don't. The lazy linking entry
32533 to the dynamic symbol resolver requires r11 be the function
32534 address (as it is for linker generated PLT stubs). Ensure
32535 r11 stays valid to the bctr by marking r11 used by the call. */
32540 /* Handle indirect calls. */
32541 if (GET_CODE (func) != SYMBOL_REF)
32543 func = force_reg (Pmode, func);
32545 /* Indirect sibcalls must go via CTR. That can't be left to
32546 reload because we want to mark every instruction in an inline
32547 PLT call sequence with a reloc, enabling the linker to edit
32548 the sequence back to a direct call when that makes sense. */
32549 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32552 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32553 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32554 emit_insn (gen_rtx_SET (func_addr, mark_func));
32555 v = gen_rtvec (2, func_addr, func_desc);
32556 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32559 emit_move_insn (func_addr, func);
32564 /* Create the call. */
32565 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32566 if (value != NULL_RTX)
32567 call[0] = gen_rtx_SET (value, call[0]);
32569 call[1] = gen_rtx_USE (VOIDmode, cookie);
32570 call[2] = simple_return_rtx;
32572 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32573 insn = emit_call_insn (insn);
32575 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32580 /* Expand code to perform a call under the Darwin ABI.
32581 Modulo handling of mlongcall, this is much the same as sysv.
32582 if/when the longcall optimisation is removed, we could drop this
32583 code and use the sysv case (taking care to avoid the tls stuff).
32585 We can use this for sibcalls too, if needed. */
32588 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
32589 rtx cookie, bool sibcall)
32591 rtx func = func_desc;
32595 int cookie_val = INTVAL (cookie);
32596 bool make_island = false;
32598 /* Handle longcall attributes, there are two cases for Darwin:
32599 1) Newer linkers are capable of synthesising any branch islands needed.
32600 2) We need a helper branch island synthesised by the compiler.
32601 The second case has mostly been retired and we don't use it for m64.
32602 In fact, it's is an optimisation, we could just indirect as sysv does..
32603 ... however, backwards compatibility for now.
32604 If we're going to use this, then we need to keep the CALL_LONG bit set,
32605 so that we can pick up the special insn form later. */
32606 if ((cookie_val & CALL_LONG) != 0
32607 && GET_CODE (func_desc) == SYMBOL_REF)
32609 /* FIXME: the longcall opt should not hang off picsymbol stubs. */
32610 if (darwin_picsymbol_stubs && TARGET_32BIT)
32611 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
32614 /* The linker is capable of doing this, but the user explicitly
32615 asked for -mlongcall, so we'll do the 'normal' version. */
32616 func = rs6000_longcall_ref (func_desc, NULL_RTX);
32617 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
32621 /* Handle indirect calls. */
32622 if (GET_CODE (func) != SYMBOL_REF)
32624 func = force_reg (Pmode, func);
32626 /* Indirect calls via CTR are strongly preferred over indirect
32627 calls via LR, and are required for indirect sibcalls, so move
32628 the address there. */
32629 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32630 emit_move_insn (func_addr, func);
32635 /* Create the call. */
32636 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32637 if (value != NULL_RTX)
32638 call[0] = gen_rtx_SET (value, call[0]);
32640 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
32643 call[2] = simple_return_rtx;
32645 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32647 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32648 insn = emit_call_insn (insn);
32649 /* Now we have the debug info in the insn, we can set up the branch island
32650 if we're using one. */
32653 tree funname = get_identifier (XSTR (func_desc, 0));
32655 if (no_previous_def (funname))
32657 rtx label_rtx = gen_label_rtx ();
32658 char *label_buf, temp_buf[256];
32659 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32660 CODE_LABEL_NUMBER (label_rtx));
32661 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32662 tree labelname = get_identifier (label_buf);
32663 add_compiler_branch_island (labelname, funname,
32664 insn_line ((const rtx_insn*)insn));
32671 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32672 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32675 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
32683 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32684 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32687 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
32693 /* Return whether we should generate PC-relative code for FNDECL. */
32695 rs6000_fndecl_pcrel_p (const_tree fndecl)
32697 if (DEFAULT_ABI != ABI_ELFv2)
32700 struct cl_target_option *opts = target_opts_for_fn (fndecl);
32702 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32703 && TARGET_CMODEL == CMODEL_MEDIUM);
32706 /* Return whether we should generate PC-relative code for *FN. */
32708 rs6000_pcrel_p (struct function *fn)
32710 if (DEFAULT_ABI != ABI_ELFv2)
32713 /* Optimize usual case. */
32715 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32716 && TARGET_CMODEL == CMODEL_MEDIUM);
32718 return rs6000_fndecl_pcrel_p (fn->decl);
32721 #ifdef HAVE_GAS_HIDDEN
32722 # define USE_HIDDEN_LINKONCE 1
32724 # define USE_HIDDEN_LINKONCE 0
32727 /* Fills in the label name that should be used for a 476 link stack thunk. */
32730 get_ppc476_thunk_name (char name[32])
32732 gcc_assert (TARGET_LINK_STACK);
32734 if (USE_HIDDEN_LINKONCE)
32735 sprintf (name, "__ppc476.get_thunk");
32737 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32740 /* This function emits the simple thunk routine that is used to preserve
32741 the link stack on the 476 cpu. */
32743 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
32745 rs6000_code_end (void)
32750 if (!TARGET_LINK_STACK)
32753 get_ppc476_thunk_name (name);
32755 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
32756 build_function_type_list (void_type_node, NULL_TREE));
32757 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
32758 NULL_TREE, void_type_node);
32759 TREE_PUBLIC (decl) = 1;
32760 TREE_STATIC (decl) = 1;
32763 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
32765 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
32766 targetm.asm_out.unique_section (decl, 0);
32767 switch_to_section (get_named_section (decl, NULL, 0));
32768 DECL_WEAK (decl) = 1;
32769 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
32770 targetm.asm_out.globalize_label (asm_out_file, name);
32771 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
32772 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
32777 switch_to_section (text_section);
32778 ASM_OUTPUT_LABEL (asm_out_file, name);
32781 DECL_INITIAL (decl) = make_node (BLOCK);
32782 current_function_decl = decl;
32783 allocate_struct_function (decl, false);
32784 init_function_start (decl);
32785 first_function_block_is_cold = false;
32786 /* Make sure unwind info is emitted for the thunk if needed. */
32787 final_start_function (emit_barrier (), asm_out_file, 1);
32789 fputs ("\tblr\n", asm_out_file);
32791 final_end_function ();
32792 init_insn_lengths ();
32793 free_after_compilation (cfun);
32795 current_function_decl = NULL;
32798 /* Add r30 to hard reg set if the prologue sets it up and it is not
32799 pic_offset_table_rtx. */
32802 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
32804 if (!TARGET_SINGLE_PIC_BASE
32806 && TARGET_MINIMAL_TOC
32807 && !constant_pool_empty_p ())
32808 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32809 if (cfun->machine->split_stack_argp_used)
32810 add_to_hard_reg_set (&set->set, Pmode, 12);
32812 /* Make sure the hard reg set doesn't include r2, which was possibly added
32813 via PIC_OFFSET_TABLE_REGNUM. */
32815 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
32819 /* Helper function for rs6000_split_logical to emit a logical instruction after
32820 spliting the operation to single GPR registers.
32822 DEST is the destination register.
32823 OP1 and OP2 are the input source registers.
32824 CODE is the base operation (AND, IOR, XOR, NOT).
32825 MODE is the machine mode.
32826 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32827 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32828 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32831 rs6000_split_logical_inner (rtx dest,
32834 enum rtx_code code,
32836 bool complement_final_p,
32837 bool complement_op1_p,
32838 bool complement_op2_p)
32842 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
32843 if (op2 && CONST_INT_P (op2)
32844 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
32845 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32847 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
32848 HOST_WIDE_INT value = INTVAL (op2) & mask;
32850 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
32855 emit_insn (gen_rtx_SET (dest, const0_rtx));
32859 else if (value == mask)
32861 if (!rtx_equal_p (dest, op1))
32862 emit_insn (gen_rtx_SET (dest, op1));
32867 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
32868 into separate ORI/ORIS or XORI/XORIS instrucitons. */
32869 else if (code == IOR || code == XOR)
32873 if (!rtx_equal_p (dest, op1))
32874 emit_insn (gen_rtx_SET (dest, op1));
32880 if (code == AND && mode == SImode
32881 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32883 emit_insn (gen_andsi3 (dest, op1, op2));
32887 if (complement_op1_p)
32888 op1 = gen_rtx_NOT (mode, op1);
32890 if (complement_op2_p)
32891 op2 = gen_rtx_NOT (mode, op2);
32893 /* For canonical RTL, if only one arm is inverted it is the first. */
32894 if (!complement_op1_p && complement_op2_p)
32895 std::swap (op1, op2);
32897 bool_rtx = ((code == NOT)
32898 ? gen_rtx_NOT (mode, op1)
32899 : gen_rtx_fmt_ee (code, mode, op1, op2));
32901 if (complement_final_p)
32902 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
32904 emit_insn (gen_rtx_SET (dest, bool_rtx));
32907 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
32908 operations are split immediately during RTL generation to allow for more
32909 optimizations of the AND/IOR/XOR.
32911 OPERANDS is an array containing the destination and two input operands.
32912 CODE is the base operation (AND, IOR, XOR, NOT).
32913 MODE is the machine mode.
32914 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32915 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32916 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32917 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32918 formation of the AND instructions. */
32921 rs6000_split_logical_di (rtx operands[3],
32922 enum rtx_code code,
32923 bool complement_final_p,
32924 bool complement_op1_p,
32925 bool complement_op2_p)
32927 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
32928 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
32929 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
32930 enum hi_lo { hi = 0, lo = 1 };
32931 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
32934 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
32935 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
32936 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
32937 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
32940 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
32943 if (!CONST_INT_P (operands[2]))
32945 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
32946 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
32950 HOST_WIDE_INT value = INTVAL (operands[2]);
32951 HOST_WIDE_INT value_hi_lo[2];
32953 gcc_assert (!complement_final_p);
32954 gcc_assert (!complement_op1_p);
32955 gcc_assert (!complement_op2_p);
32957 value_hi_lo[hi] = value >> 32;
32958 value_hi_lo[lo] = value & lower_32bits;
32960 for (i = 0; i < 2; i++)
32962 HOST_WIDE_INT sub_value = value_hi_lo[i];
32964 if (sub_value & sign_bit)
32965 sub_value |= upper_32bits;
32967 op2_hi_lo[i] = GEN_INT (sub_value);
32969 /* If this is an AND instruction, check to see if we need to load
32970 the value in a register. */
32971 if (code == AND && sub_value != -1 && sub_value != 0
32972 && !and_operand (op2_hi_lo[i], SImode))
32973 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
32978 for (i = 0; i < 2; i++)
32980 /* Split large IOR/XOR operations. */
32981 if ((code == IOR || code == XOR)
32982 && CONST_INT_P (op2_hi_lo[i])
32983 && !complement_final_p
32984 && !complement_op1_p
32985 && !complement_op2_p
32986 && !logical_const_operand (op2_hi_lo[i], SImode))
32988 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
32989 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
32990 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
32991 rtx tmp = gen_reg_rtx (SImode);
32993 /* Make sure the constant is sign extended. */
32994 if ((hi_16bits & sign_bit) != 0)
32995 hi_16bits |= upper_32bits;
32997 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
32998 code, SImode, false, false, false);
33000 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33001 code, SImode, false, false, false);
33004 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33005 code, SImode, complement_final_p,
33006 complement_op1_p, complement_op2_p);
33012 /* Split the insns that make up boolean operations operating on multiple GPR
33013 registers. The boolean MD patterns ensure that the inputs either are
33014 exactly the same as the output registers, or there is no overlap.
33016 OPERANDS is an array containing the destination and two input operands.
33017 CODE is the base operation (AND, IOR, XOR, NOT).
33018 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33019 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33020 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33023 rs6000_split_logical (rtx operands[3],
33024 enum rtx_code code,
33025 bool complement_final_p,
33026 bool complement_op1_p,
33027 bool complement_op2_p)
33029 machine_mode mode = GET_MODE (operands[0]);
33030 machine_mode sub_mode;
33032 int sub_size, regno0, regno1, nregs, i;
33034 /* If this is DImode, use the specialized version that can run before
33035 register allocation. */
33036 if (mode == DImode && !TARGET_POWERPC64)
33038 rs6000_split_logical_di (operands, code, complement_final_p,
33039 complement_op1_p, complement_op2_p);
33045 op2 = (code == NOT) ? NULL_RTX : operands[2];
33046 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33047 sub_size = GET_MODE_SIZE (sub_mode);
33048 regno0 = REGNO (op0);
33049 regno1 = REGNO (op1);
33051 gcc_assert (reload_completed);
33052 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33053 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33055 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33056 gcc_assert (nregs > 1);
33058 if (op2 && REG_P (op2))
33059 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33061 for (i = 0; i < nregs; i++)
33063 int offset = i * sub_size;
33064 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33065 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33066 rtx sub_op2 = ((code == NOT)
33068 : simplify_subreg (sub_mode, op2, mode, offset));
33070 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33071 complement_final_p, complement_op1_p,
33079 /* Return true if the peephole2 can combine a load involving a combination of
33080 an addis instruction and a load with an offset that can be fused together on
33084 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33085 rtx addis_value, /* addis value. */
33086 rtx target, /* target register that is loaded. */
33087 rtx mem) /* bottom part of the memory addr. */
33092 /* Validate arguments. */
33093 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33096 if (!base_reg_operand (target, GET_MODE (target)))
33099 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33102 /* Allow sign/zero extension. */
33103 if (GET_CODE (mem) == ZERO_EXTEND
33104 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33105 mem = XEXP (mem, 0);
33110 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33113 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33114 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33117 /* Validate that the register used to load the high value is either the
33118 register being loaded, or we can safely replace its use.
33120 This function is only called from the peephole2 pass and we assume that
33121 there are 2 instructions in the peephole (addis and load), so we want to
33122 check if the target register was not used in the memory address and the
33123 register to hold the addis result is dead after the peephole. */
33124 if (REGNO (addis_reg) != REGNO (target))
33126 if (reg_mentioned_p (target, mem))
33129 if (!peep2_reg_dead_p (2, addis_reg))
33132 /* If the target register being loaded is the stack pointer, we must
33133 avoid loading any other value into it, even temporarily. */
33134 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33138 base_reg = XEXP (addr, 0);
33139 return REGNO (addis_reg) == REGNO (base_reg);
33142 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33143 sequence. We adjust the addis register to use the target register. If the
33144 load sign extends, we adjust the code to do the zero extending load, and an
33145 explicit sign extension later since the fusion only covers zero extending
33149 operands[0] register set with addis (to be replaced with target)
33150 operands[1] value set via addis
33151 operands[2] target register being loaded
33152 operands[3] D-form memory reference using operands[0]. */
33155 expand_fusion_gpr_load (rtx *operands)
33157 rtx addis_value = operands[1];
33158 rtx target = operands[2];
33159 rtx orig_mem = operands[3];
33160 rtx new_addr, new_mem, orig_addr, offset;
33161 enum rtx_code plus_or_lo_sum;
33162 machine_mode target_mode = GET_MODE (target);
33163 machine_mode extend_mode = target_mode;
33164 machine_mode ptr_mode = Pmode;
33165 enum rtx_code extend = UNKNOWN;
33167 if (GET_CODE (orig_mem) == ZERO_EXTEND
33168 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33170 extend = GET_CODE (orig_mem);
33171 orig_mem = XEXP (orig_mem, 0);
33172 target_mode = GET_MODE (orig_mem);
33175 gcc_assert (MEM_P (orig_mem));
33177 orig_addr = XEXP (orig_mem, 0);
33178 plus_or_lo_sum = GET_CODE (orig_addr);
33179 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33181 offset = XEXP (orig_addr, 1);
33182 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33183 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33185 if (extend != UNKNOWN)
33186 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33188 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33189 UNSPEC_FUSION_GPR);
33190 emit_insn (gen_rtx_SET (target, new_mem));
33192 if (extend == SIGN_EXTEND)
33194 int sub_off = ((BYTES_BIG_ENDIAN)
33195 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33198 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33200 emit_insn (gen_rtx_SET (target,
33201 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33207 /* Emit the addis instruction that will be part of a fused instruction
33211 emit_fusion_addis (rtx target, rtx addis_value)
33214 const char *addis_str = NULL;
33216 /* Emit the addis instruction. */
33217 fuse_ops[0] = target;
33218 if (satisfies_constraint_L (addis_value))
33220 fuse_ops[1] = addis_value;
33221 addis_str = "lis %0,%v1";
33224 else if (GET_CODE (addis_value) == PLUS)
33226 rtx op0 = XEXP (addis_value, 0);
33227 rtx op1 = XEXP (addis_value, 1);
33229 if (REG_P (op0) && CONST_INT_P (op1)
33230 && satisfies_constraint_L (op1))
33234 addis_str = "addis %0,%1,%v2";
33238 else if (GET_CODE (addis_value) == HIGH)
33240 rtx value = XEXP (addis_value, 0);
33241 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33243 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33244 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33246 addis_str = "addis %0,%2,%1@toc@ha";
33248 else if (TARGET_XCOFF)
33249 addis_str = "addis %0,%1@u(%2)";
33252 gcc_unreachable ();
33255 else if (GET_CODE (value) == PLUS)
33257 rtx op0 = XEXP (value, 0);
33258 rtx op1 = XEXP (value, 1);
33260 if (GET_CODE (op0) == UNSPEC
33261 && XINT (op0, 1) == UNSPEC_TOCREL
33262 && CONST_INT_P (op1))
33264 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33265 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33268 addis_str = "addis %0,%2,%1+%3@toc@ha";
33270 else if (TARGET_XCOFF)
33271 addis_str = "addis %0,%1+%3@u(%2)";
33274 gcc_unreachable ();
33278 else if (satisfies_constraint_L (value))
33280 fuse_ops[1] = value;
33281 addis_str = "lis %0,%v1";
33284 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33286 fuse_ops[1] = value;
33287 addis_str = "lis %0,%1@ha";
33292 fatal_insn ("Could not generate addis value for fusion", addis_value);
33294 output_asm_insn (addis_str, fuse_ops);
33297 /* Emit a D-form load or store instruction that is the second instruction
33298 of a fusion sequence. */
33301 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
33304 char insn_template[80];
33306 fuse_ops[0] = load_reg;
33307 fuse_ops[1] = addis_reg;
33309 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
33311 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
33312 fuse_ops[2] = offset;
33313 output_asm_insn (insn_template, fuse_ops);
33316 else if (GET_CODE (offset) == UNSPEC
33317 && XINT (offset, 1) == UNSPEC_TOCREL)
33320 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
33322 else if (TARGET_XCOFF)
33323 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33326 gcc_unreachable ();
33328 fuse_ops[2] = XVECEXP (offset, 0, 0);
33329 output_asm_insn (insn_template, fuse_ops);
33332 else if (GET_CODE (offset) == PLUS
33333 && GET_CODE (XEXP (offset, 0)) == UNSPEC
33334 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
33335 && CONST_INT_P (XEXP (offset, 1)))
33337 rtx tocrel_unspec = XEXP (offset, 0);
33339 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
33341 else if (TARGET_XCOFF)
33342 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
33345 gcc_unreachable ();
33347 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
33348 fuse_ops[3] = XEXP (offset, 1);
33349 output_asm_insn (insn_template, fuse_ops);
33352 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
33354 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33356 fuse_ops[2] = offset;
33357 output_asm_insn (insn_template, fuse_ops);
33361 fatal_insn ("Unable to generate load/store offset for fusion", offset);
33366 /* Given an address, convert it into the addis and load offset parts. Addresses
33367 created during the peephole2 process look like:
33368 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
33369 (unspec [(...)] UNSPEC_TOCREL)) */
33372 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
33376 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
33378 hi = XEXP (addr, 0);
33379 lo = XEXP (addr, 1);
33382 gcc_unreachable ();
33388 /* Return a string to fuse an addis instruction with a gpr load to the same
33389 register that we loaded up the addis instruction. The address that is used
33390 is the logical address that was formed during peephole2:
33391 (lo_sum (high) (low-part))
33393 The code is complicated, so we call output_asm_insn directly, and just
33397 emit_fusion_gpr_load (rtx target, rtx mem)
33402 const char *load_str = NULL;
33405 if (GET_CODE (mem) == ZERO_EXTEND)
33406 mem = XEXP (mem, 0);
33408 gcc_assert (REG_P (target) && MEM_P (mem));
33410 addr = XEXP (mem, 0);
33411 fusion_split_address (addr, &addis_value, &load_offset);
33413 /* Now emit the load instruction to the same register. */
33414 mode = GET_MODE (mem);
33432 gcc_assert (TARGET_POWERPC64);
33437 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
33440 /* Emit the addis instruction. */
33441 emit_fusion_addis (target, addis_value);
33443 /* Emit the D-form load instruction. */
33444 emit_fusion_load (target, target, load_offset, load_str);
33450 #ifdef RS6000_GLIBC_ATOMIC_FENV
33451 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
33452 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
33455 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
33458 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
33460 if (!TARGET_HARD_FLOAT)
33462 #ifdef RS6000_GLIBC_ATOMIC_FENV
33463 if (atomic_hold_decl == NULL_TREE)
33466 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33467 get_identifier ("__atomic_feholdexcept"),
33468 build_function_type_list (void_type_node,
33469 double_ptr_type_node,
33471 TREE_PUBLIC (atomic_hold_decl) = 1;
33472 DECL_EXTERNAL (atomic_hold_decl) = 1;
33475 if (atomic_clear_decl == NULL_TREE)
33478 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33479 get_identifier ("__atomic_feclearexcept"),
33480 build_function_type_list (void_type_node,
33482 TREE_PUBLIC (atomic_clear_decl) = 1;
33483 DECL_EXTERNAL (atomic_clear_decl) = 1;
33486 tree const_double = build_qualified_type (double_type_node,
33488 tree const_double_ptr = build_pointer_type (const_double);
33489 if (atomic_update_decl == NULL_TREE)
33492 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33493 get_identifier ("__atomic_feupdateenv"),
33494 build_function_type_list (void_type_node,
33497 TREE_PUBLIC (atomic_update_decl) = 1;
33498 DECL_EXTERNAL (atomic_update_decl) = 1;
33501 tree fenv_var = create_tmp_var_raw (double_type_node);
33502 TREE_ADDRESSABLE (fenv_var) = 1;
33503 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
33505 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
33506 *clear = build_call_expr (atomic_clear_decl, 0);
33507 *update = build_call_expr (atomic_update_decl, 1,
33508 fold_convert (const_double_ptr, fenv_addr));
33513 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
33514 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
33515 tree call_mffs = build_call_expr (mffs, 0);
33517 /* Generates the equivalent of feholdexcept (&fenv_var)
33519 *fenv_var = __builtin_mffs ();
33521 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
33522 __builtin_mtfsf (0xff, fenv_hold); */
33524 /* Mask to clear everything except for the rounding modes and non-IEEE
33525 arithmetic flag. */
33526 const unsigned HOST_WIDE_INT hold_exception_mask =
33527 HOST_WIDE_INT_C (0xffffffff00000007);
33529 tree fenv_var = create_tmp_var_raw (double_type_node);
33531 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
33533 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
33534 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33535 build_int_cst (uint64_type_node,
33536 hold_exception_mask));
33538 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33541 tree hold_mtfsf = build_call_expr (mtfsf, 2,
33542 build_int_cst (unsigned_type_node, 0xff),
33545 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
33547 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
33549 double fenv_clear = __builtin_mffs ();
33550 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
33551 __builtin_mtfsf (0xff, fenv_clear); */
33553 /* Mask to clear everything except for the rounding modes and non-IEEE
33554 arithmetic flag. */
33555 const unsigned HOST_WIDE_INT clear_exception_mask =
33556 HOST_WIDE_INT_C (0xffffffff00000000);
33558 tree fenv_clear = create_tmp_var_raw (double_type_node);
33560 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
33562 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
33563 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
33565 build_int_cst (uint64_type_node,
33566 clear_exception_mask));
33568 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33569 fenv_clear_llu_and);
33571 tree clear_mtfsf = build_call_expr (mtfsf, 2,
33572 build_int_cst (unsigned_type_node, 0xff),
33575 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
33577 /* Generates the equivalent of feupdateenv (&fenv_var)
33579 double old_fenv = __builtin_mffs ();
33580 double fenv_update;
33581 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
33582 (*(uint64_t*)fenv_var 0x1ff80fff);
33583 __builtin_mtfsf (0xff, fenv_update); */
33585 const unsigned HOST_WIDE_INT update_exception_mask =
33586 HOST_WIDE_INT_C (0xffffffff1fffff00);
33587 const unsigned HOST_WIDE_INT new_exception_mask =
33588 HOST_WIDE_INT_C (0x1ff80fff);
33590 tree old_fenv = create_tmp_var_raw (double_type_node);
33591 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
33593 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
33594 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
33595 build_int_cst (uint64_type_node,
33596 update_exception_mask));
33598 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33599 build_int_cst (uint64_type_node,
33600 new_exception_mask));
33602 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
33603 old_llu_and, new_llu_and);
33605 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33608 tree update_mtfsf = build_call_expr (mtfsf, 2,
33609 build_int_cst (unsigned_type_node, 0xff),
33610 fenv_update_mtfsf);
33612 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
33616 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
33618 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33620 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33621 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33623 /* The destination of the vmrgew instruction layout is:
33624 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33625 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33626 vmrgew instruction will be correct. */
33627 if (BYTES_BIG_ENDIAN)
33629 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
33631 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
33636 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
33637 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
33640 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33641 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33643 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
33644 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
33646 if (BYTES_BIG_ENDIAN)
33647 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33649 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33653 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
33655 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33657 rtx_tmp0 = gen_reg_rtx (V2DImode);
33658 rtx_tmp1 = gen_reg_rtx (V2DImode);
33660 /* The destination of the vmrgew instruction layout is:
33661 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33662 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33663 vmrgew instruction will be correct. */
33664 if (BYTES_BIG_ENDIAN)
33666 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
33667 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
33671 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
33672 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
33675 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33676 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33678 if (signed_convert)
33680 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
33681 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
33685 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
33686 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
33689 if (BYTES_BIG_ENDIAN)
33690 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33692 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33696 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
33699 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33701 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33702 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33704 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
33705 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
33707 rtx_tmp2 = gen_reg_rtx (V4SImode);
33708 rtx_tmp3 = gen_reg_rtx (V4SImode);
33710 if (signed_convert)
33712 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
33713 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
33717 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
33718 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
33721 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
33724 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
33727 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
33728 optimization_type opt_type)
33733 return (opt_type == OPTIMIZE_FOR_SPEED
33734 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
33741 /* Implement TARGET_CONSTANT_ALIGNMENT. */
33743 static HOST_WIDE_INT
33744 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33746 if (TREE_CODE (exp) == STRING_CST
33747 && (STRICT_ALIGNMENT || !optimize_size))
33748 return MAX (align, BITS_PER_WORD);
33752 /* Implement TARGET_STARTING_FRAME_OFFSET. */
33754 static HOST_WIDE_INT
33755 rs6000_starting_frame_offset (void)
33757 if (FRAME_GROWS_DOWNWARD)
33759 return RS6000_STARTING_FRAME_OFFSET;
33763 /* Create an alias for a mangled name where we have changed the mangling (in
33764 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
33765 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
33767 #if TARGET_ELF && RS6000_WEAK
33769 rs6000_globalize_decl_name (FILE * stream, tree decl)
33771 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33773 targetm.asm_out.globalize_label (stream, name);
33775 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
33777 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
33778 const char *old_name;
33780 ieee128_mangling_gcc_8_1 = true;
33781 lang_hooks.set_decl_assembler_name (decl);
33782 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33783 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
33784 ieee128_mangling_gcc_8_1 = false;
33786 if (strcmp (name, old_name) != 0)
33788 fprintf (stream, "\t.weak %s\n", old_name);
33789 fprintf (stream, "\t.set %s,%s\n", old_name, name);
33796 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
33797 function names from <foo>l to <foo>f128 if the default long double type is
33798 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
33799 include file switches the names on systems that support long double as IEEE
33800 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
33801 In the future, glibc will export names like __ieee128_sinf128 and we can
33802 switch to using those instead of using sinf128, which pollutes the user's
33805 This will switch the names for Fortran math functions as well (which doesn't
33806 use math.h). However, Fortran needs other changes to the compiler and
33807 library before you can switch the real*16 type at compile time.
33809 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
33810 only do this if the default is that long double is IBM extended double, and
33811 the user asked for IEEE 128-bit. */
33814 rs6000_mangle_decl_assembler_name (tree decl, tree id)
33816 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
33817 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
33819 size_t len = IDENTIFIER_LENGTH (id);
33820 const char *name = IDENTIFIER_POINTER (id);
33822 if (name[len - 1] == 'l')
33824 bool uses_ieee128_p = false;
33825 tree type = TREE_TYPE (decl);
33826 machine_mode ret_mode = TYPE_MODE (type);
33828 /* See if the function returns a IEEE 128-bit floating point type or
33830 if (ret_mode == TFmode || ret_mode == TCmode)
33831 uses_ieee128_p = true;
33834 function_args_iterator args_iter;
33837 /* See if the function passes a IEEE 128-bit floating point type
33838 or complex type. */
33839 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
33841 machine_mode arg_mode = TYPE_MODE (arg);
33842 if (arg_mode == TFmode || arg_mode == TCmode)
33844 uses_ieee128_p = true;
33850 /* If we passed or returned an IEEE 128-bit floating point type,
33851 change the name. */
33852 if (uses_ieee128_p)
33854 char *name2 = (char *) alloca (len + 4);
33855 memcpy (name2, name, len - 1);
33856 strcpy (name2 + len - 1, "f128");
33857 id = get_identifier (name2);
33865 /* Predict whether the given loop in gimple will be transformed in the RTL
33866 doloop_optimize pass. */
33869 rs6000_predict_doloop_p (struct loop *loop)
33873 /* On rs6000, targetm.can_use_doloop_p is actually
33874 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
33875 if (loop->inner != NULL)
33877 if (dump_file && (dump_flags & TDF_DETAILS))
33878 fprintf (dump_file, "Predict doloop failure due to"
33879 " loop nesting.\n");
33886 struct gcc_target targetm = TARGET_INITIALIZER;
33888 #include "gt-rs6000.h"