bcfc881
[gcc.git] /
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "except.h"
75 #if TARGET_XCOFF
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #endif
78 #if TARGET_MACHO
79 #include "gstab.h" /* for N_SLINE */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "tree-ssa-propagate.h"
84 #include "tree-vrp.h"
85 #include "tree-ssanames.h"
86 #include "rs6000-internal.h"
87
88 /* This file should be included last. */
89 #include "target-def.h"
90
91 #ifndef TARGET_NO_PROTOTYPE
92 #define TARGET_NO_PROTOTYPE 0
93 #endif
94
95 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
96 systems will also set long double to be IEEE 128-bit. AIX and Darwin
97 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
98 those systems will not pick up this default. This needs to be after all
99 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
100 properly defined. */
101 #ifndef TARGET_IEEEQUAD_DEFAULT
102 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
103 #define TARGET_IEEEQUAD_DEFAULT 1
104 #else
105 #define TARGET_IEEEQUAD_DEFAULT 0
106 #endif
107 #endif
108
109 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
110
111 /* Support targetm.vectorize.builtin_mask_for_load. */
112 static GTY(()) tree altivec_builtin_mask_for_load;
113
114 /* Set to nonzero once AIX common-mode calls have been defined. */
115 static GTY(()) int common_mode_defined;
116
117 #ifdef USING_ELFOS_H
118 /* Counter for labels which are to be placed in .fixup. */
119 int fixuplabelno = 0;
120 #endif
121
122 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
123 int dot_symbols;
124
125 /* Specify the machine mode that pointers have. After generation of rtl, the
126 compiler makes no further distinction between pointers and any other objects
127 of this machine mode. */
128 scalar_int_mode rs6000_pmode;
129
130 #if TARGET_ELF
131 /* Note whether IEEE 128-bit floating point was passed or returned, either as
132 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
133 floating point. We changed the default C++ mangling for these types and we
134 may want to generate a weak alias of the old mangling (U10__float128) to the
135 new mangling (u9__ieee128). */
136 static bool rs6000_passes_ieee128;
137 #endif
138
139 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
140 name used in current releases (i.e. u9__ieee128). */
141 static bool ieee128_mangling_gcc_8_1;
142
143 /* Width in bits of a pointer. */
144 unsigned rs6000_pointer_size;
145
146 #ifdef HAVE_AS_GNU_ATTRIBUTE
147 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
148 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
149 # endif
150 /* Flag whether floating point values have been passed/returned.
151 Note that this doesn't say whether fprs are used, since the
152 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
153 should be set for soft-float values passed in gprs and ieee128
154 values passed in vsx registers. */
155 static bool rs6000_passes_float;
156 static bool rs6000_passes_long_double;
157 /* Flag whether vector values have been passed/returned. */
158 static bool rs6000_passes_vector;
159 /* Flag whether small (<= 8 byte) structures have been returned. */
160 static bool rs6000_returns_struct;
161 #endif
162
163 /* Value is TRUE if register/mode pair is acceptable. */
164 static bool rs6000_hard_regno_mode_ok_p
165 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
166
167 /* Maximum number of registers needed for a given register class and mode. */
168 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
169
170 /* How many registers are needed for a given register and mode. */
171 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
172
173 /* Map register number to register class. */
174 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
175
176 static int dbg_cost_ctrl;
177
178 /* Built in types. */
179 tree rs6000_builtin_types[RS6000_BTI_MAX];
180 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
181
182 /* Flag to say the TOC is initialized */
183 int toc_initialized, need_toc_init;
184 char toc_label_name[10];
185
186 /* Cached value of rs6000_variable_issue. This is cached in
187 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
188 static short cached_can_issue_more;
189
190 static GTY(()) section *read_only_data_section;
191 static GTY(()) section *private_data_section;
192 static GTY(()) section *tls_data_section;
193 static GTY(()) section *tls_private_data_section;
194 static GTY(()) section *read_only_private_data_section;
195 static GTY(()) section *sdata2_section;
196
197 extern GTY(()) section *toc_section;
198 section *toc_section = 0;
199
200 struct builtin_description
201 {
202 const HOST_WIDE_INT mask;
203 const enum insn_code icode;
204 const char *const name;
205 const enum rs6000_builtins code;
206 };
207
208 /* Describe the vector unit used for modes. */
209 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
210 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
211
212 /* Register classes for various constraints that are based on the target
213 switches. */
214 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
215
216 /* Describe the alignment of a vector. */
217 int rs6000_vector_align[NUM_MACHINE_MODES];
218
219 /* Map selected modes to types for builtins. */
220 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
221
222 /* What modes to automatically generate reciprocal divide estimate (fre) and
223 reciprocal sqrt (frsqrte) for. */
224 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
225
226 /* Masks to determine which reciprocal esitmate instructions to generate
227 automatically. */
228 enum rs6000_recip_mask {
229 RECIP_SF_DIV = 0x001, /* Use divide estimate */
230 RECIP_DF_DIV = 0x002,
231 RECIP_V4SF_DIV = 0x004,
232 RECIP_V2DF_DIV = 0x008,
233
234 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
235 RECIP_DF_RSQRT = 0x020,
236 RECIP_V4SF_RSQRT = 0x040,
237 RECIP_V2DF_RSQRT = 0x080,
238
239 /* Various combination of flags for -mrecip=xxx. */
240 RECIP_NONE = 0,
241 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
242 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
243 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
244
245 RECIP_HIGH_PRECISION = RECIP_ALL,
246
247 /* On low precision machines like the power5, don't enable double precision
248 reciprocal square root estimate, since it isn't accurate enough. */
249 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
250 };
251
252 /* -mrecip options. */
253 static struct
254 {
255 const char *string; /* option name */
256 unsigned int mask; /* mask bits to set */
257 } recip_options[] = {
258 { "all", RECIP_ALL },
259 { "none", RECIP_NONE },
260 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
261 | RECIP_V2DF_DIV) },
262 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
263 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
264 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
265 | RECIP_V2DF_RSQRT) },
266 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
267 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
268 };
269
270 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
271 static const struct
272 {
273 const char *cpu;
274 unsigned int cpuid;
275 } cpu_is_info[] = {
276 { "power9", PPC_PLATFORM_POWER9 },
277 { "power8", PPC_PLATFORM_POWER8 },
278 { "power7", PPC_PLATFORM_POWER7 },
279 { "power6x", PPC_PLATFORM_POWER6X },
280 { "power6", PPC_PLATFORM_POWER6 },
281 { "power5+", PPC_PLATFORM_POWER5_PLUS },
282 { "power5", PPC_PLATFORM_POWER5 },
283 { "ppc970", PPC_PLATFORM_PPC970 },
284 { "power4", PPC_PLATFORM_POWER4 },
285 { "ppca2", PPC_PLATFORM_PPCA2 },
286 { "ppc476", PPC_PLATFORM_PPC476 },
287 { "ppc464", PPC_PLATFORM_PPC464 },
288 { "ppc440", PPC_PLATFORM_PPC440 },
289 { "ppc405", PPC_PLATFORM_PPC405 },
290 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
291 };
292
293 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
294 static const struct
295 {
296 const char *hwcap;
297 int mask;
298 unsigned int id;
299 } cpu_supports_info[] = {
300 /* AT_HWCAP masks. */
301 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
302 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
303 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
304 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
305 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
306 { "booke", PPC_FEATURE_BOOKE, 0 },
307 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
308 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
309 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
310 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
311 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
312 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
313 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
314 { "notb", PPC_FEATURE_NO_TB, 0 },
315 { "pa6t", PPC_FEATURE_PA6T, 0 },
316 { "power4", PPC_FEATURE_POWER4, 0 },
317 { "power5", PPC_FEATURE_POWER5, 0 },
318 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
319 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
320 { "ppc32", PPC_FEATURE_32, 0 },
321 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
322 { "ppc64", PPC_FEATURE_64, 0 },
323 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
324 { "smt", PPC_FEATURE_SMT, 0 },
325 { "spe", PPC_FEATURE_HAS_SPE, 0 },
326 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
327 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
328 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
329
330 /* AT_HWCAP2 masks. */
331 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
332 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
333 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
334 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
335 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
336 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
337 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
338 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
339 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
340 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
341 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
342 { "darn", PPC_FEATURE2_DARN, 1 },
343 { "scv", PPC_FEATURE2_SCV, 1 }
344 };
345
346 /* On PowerPC, we have a limited number of target clones that we care about
347 which means we can use an array to hold the options, rather than having more
348 elaborate data structures to identify each possible variation. Order the
349 clones from the default to the highest ISA. */
350 enum {
351 CLONE_DEFAULT = 0, /* default clone. */
352 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
353 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
354 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
355 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
356 CLONE_MAX
357 };
358
359 /* Map compiler ISA bits into HWCAP names. */
360 struct clone_map {
361 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
362 const char *name; /* name to use in __builtin_cpu_supports. */
363 };
364
365 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
366 { 0, "" }, /* Default options. */
367 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
368 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
369 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
370 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
371 };
372
373
374 /* Newer LIBCs explicitly export this symbol to declare that they provide
375 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
376 reference to this symbol whenever we expand a CPU builtin, so that
377 we never link against an old LIBC. */
378 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
379
380 /* True if we have expanded a CPU builtin. */
381 bool cpu_builtin_p;
382
383 /* Pointer to function (in rs6000-c.c) that can define or undefine target
384 macros that have changed. Languages that don't support the preprocessor
385 don't link in rs6000-c.c, so we can't call it directly. */
386 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
387
388 /* Simplfy register classes into simpler classifications. We assume
389 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
390 check for standard register classes (gpr/floating/altivec/vsx) and
391 floating/vector classes (float/altivec/vsx). */
392
393 enum rs6000_reg_type {
394 NO_REG_TYPE,
395 PSEUDO_REG_TYPE,
396 GPR_REG_TYPE,
397 VSX_REG_TYPE,
398 ALTIVEC_REG_TYPE,
399 FPR_REG_TYPE,
400 SPR_REG_TYPE,
401 CR_REG_TYPE
402 };
403
404 /* Map register class to register type. */
405 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
406
407 /* First/last register type for the 'normal' register types (i.e. general
408 purpose, floating point, altivec, and VSX registers). */
409 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
410
411 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
412
413
414 /* Register classes we care about in secondary reload or go if legitimate
415 address. We only need to worry about GPR, FPR, and Altivec registers here,
416 along an ANY field that is the OR of the 3 register classes. */
417
418 enum rs6000_reload_reg_type {
419 RELOAD_REG_GPR, /* General purpose registers. */
420 RELOAD_REG_FPR, /* Traditional floating point regs. */
421 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
422 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
423 N_RELOAD_REG
424 };
425
426 /* For setting up register classes, loop through the 3 register classes mapping
427 into real registers, and skip the ANY class, which is just an OR of the
428 bits. */
429 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
430 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
431
432 /* Map reload register type to a register in the register class. */
433 struct reload_reg_map_type {
434 const char *name; /* Register class name. */
435 int reg; /* Register in the register class. */
436 };
437
438 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
439 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
440 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
441 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
442 { "Any", -1 }, /* RELOAD_REG_ANY. */
443 };
444
445 /* Mask bits for each register class, indexed per mode. Historically the
446 compiler has been more restrictive which types can do PRE_MODIFY instead of
447 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
448 typedef unsigned char addr_mask_type;
449
450 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
451 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
452 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
453 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
454 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
455 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
456 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
457 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
458
459 /* Register type masks based on the type, of valid addressing modes. */
460 struct rs6000_reg_addr {
461 enum insn_code reload_load; /* INSN to reload for loading. */
462 enum insn_code reload_store; /* INSN to reload for storing. */
463 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
464 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
465 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
466 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
467 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
468 };
469
470 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
471
472 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
473 static inline bool
474 mode_supports_pre_incdec_p (machine_mode mode)
475 {
476 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
477 != 0);
478 }
479
480 /* Helper function to say whether a mode supports PRE_MODIFY. */
481 static inline bool
482 mode_supports_pre_modify_p (machine_mode mode)
483 {
484 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
485 != 0);
486 }
487
488 /* Return true if we have D-form addressing in altivec registers. */
489 static inline bool
490 mode_supports_vmx_dform (machine_mode mode)
491 {
492 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
493 }
494
495 /* Return true if we have D-form addressing in VSX registers. This addressing
496 is more limited than normal d-form addressing in that the offset must be
497 aligned on a 16-byte boundary. */
498 static inline bool
499 mode_supports_dq_form (machine_mode mode)
500 {
501 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
502 != 0);
503 }
504
505 /* Given that there exists at least one variable that is set (produced)
506 by OUT_INSN and read (consumed) by IN_INSN, return true iff
507 IN_INSN represents one or more memory store operations and none of
508 the variables set by OUT_INSN is used by IN_INSN as the address of a
509 store operation. If either IN_INSN or OUT_INSN does not represent
510 a "single" RTL SET expression (as loosely defined by the
511 implementation of the single_set function) or a PARALLEL with only
512 SETs, CLOBBERs, and USEs inside, this function returns false.
513
514 This rs6000-specific version of store_data_bypass_p checks for
515 certain conditions that result in assertion failures (and internal
516 compiler errors) in the generic store_data_bypass_p function and
517 returns false rather than calling store_data_bypass_p if one of the
518 problematic conditions is detected. */
519
520 int
521 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
522 {
523 rtx out_set, in_set;
524 rtx out_pat, in_pat;
525 rtx out_exp, in_exp;
526 int i, j;
527
528 in_set = single_set (in_insn);
529 if (in_set)
530 {
531 if (MEM_P (SET_DEST (in_set)))
532 {
533 out_set = single_set (out_insn);
534 if (!out_set)
535 {
536 out_pat = PATTERN (out_insn);
537 if (GET_CODE (out_pat) == PARALLEL)
538 {
539 for (i = 0; i < XVECLEN (out_pat, 0); i++)
540 {
541 out_exp = XVECEXP (out_pat, 0, i);
542 if ((GET_CODE (out_exp) == CLOBBER)
543 || (GET_CODE (out_exp) == USE))
544 continue;
545 else if (GET_CODE (out_exp) != SET)
546 return false;
547 }
548 }
549 }
550 }
551 }
552 else
553 {
554 in_pat = PATTERN (in_insn);
555 if (GET_CODE (in_pat) != PARALLEL)
556 return false;
557
558 for (i = 0; i < XVECLEN (in_pat, 0); i++)
559 {
560 in_exp = XVECEXP (in_pat, 0, i);
561 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
562 continue;
563 else if (GET_CODE (in_exp) != SET)
564 return false;
565
566 if (MEM_P (SET_DEST (in_exp)))
567 {
568 out_set = single_set (out_insn);
569 if (!out_set)
570 {
571 out_pat = PATTERN (out_insn);
572 if (GET_CODE (out_pat) != PARALLEL)
573 return false;
574 for (j = 0; j < XVECLEN (out_pat, 0); j++)
575 {
576 out_exp = XVECEXP (out_pat, 0, j);
577 if ((GET_CODE (out_exp) == CLOBBER)
578 || (GET_CODE (out_exp) == USE))
579 continue;
580 else if (GET_CODE (out_exp) != SET)
581 return false;
582 }
583 }
584 }
585 }
586 }
587 return store_data_bypass_p (out_insn, in_insn);
588 }
589
590 \f
591 /* Processor costs (relative to an add) */
592
593 const struct processor_costs *rs6000_cost;
594
595 /* Instruction size costs on 32bit processors. */
596 static const
597 struct processor_costs size32_cost = {
598 COSTS_N_INSNS (1), /* mulsi */
599 COSTS_N_INSNS (1), /* mulsi_const */
600 COSTS_N_INSNS (1), /* mulsi_const9 */
601 COSTS_N_INSNS (1), /* muldi */
602 COSTS_N_INSNS (1), /* divsi */
603 COSTS_N_INSNS (1), /* divdi */
604 COSTS_N_INSNS (1), /* fp */
605 COSTS_N_INSNS (1), /* dmul */
606 COSTS_N_INSNS (1), /* sdiv */
607 COSTS_N_INSNS (1), /* ddiv */
608 32, /* cache line size */
609 0, /* l1 cache */
610 0, /* l2 cache */
611 0, /* streams */
612 0, /* SF->DF convert */
613 };
614
615 /* Instruction size costs on 64bit processors. */
616 static const
617 struct processor_costs size64_cost = {
618 COSTS_N_INSNS (1), /* mulsi */
619 COSTS_N_INSNS (1), /* mulsi_const */
620 COSTS_N_INSNS (1), /* mulsi_const9 */
621 COSTS_N_INSNS (1), /* muldi */
622 COSTS_N_INSNS (1), /* divsi */
623 COSTS_N_INSNS (1), /* divdi */
624 COSTS_N_INSNS (1), /* fp */
625 COSTS_N_INSNS (1), /* dmul */
626 COSTS_N_INSNS (1), /* sdiv */
627 COSTS_N_INSNS (1), /* ddiv */
628 128, /* cache line size */
629 0, /* l1 cache */
630 0, /* l2 cache */
631 0, /* streams */
632 0, /* SF->DF convert */
633 };
634
635 /* Instruction costs on RS64A processors. */
636 static const
637 struct processor_costs rs64a_cost = {
638 COSTS_N_INSNS (20), /* mulsi */
639 COSTS_N_INSNS (12), /* mulsi_const */
640 COSTS_N_INSNS (8), /* mulsi_const9 */
641 COSTS_N_INSNS (34), /* muldi */
642 COSTS_N_INSNS (65), /* divsi */
643 COSTS_N_INSNS (67), /* divdi */
644 COSTS_N_INSNS (4), /* fp */
645 COSTS_N_INSNS (4), /* dmul */
646 COSTS_N_INSNS (31), /* sdiv */
647 COSTS_N_INSNS (31), /* ddiv */
648 128, /* cache line size */
649 128, /* l1 cache */
650 2048, /* l2 cache */
651 1, /* streams */
652 0, /* SF->DF convert */
653 };
654
655 /* Instruction costs on MPCCORE processors. */
656 static const
657 struct processor_costs mpccore_cost = {
658 COSTS_N_INSNS (2), /* mulsi */
659 COSTS_N_INSNS (2), /* mulsi_const */
660 COSTS_N_INSNS (2), /* mulsi_const9 */
661 COSTS_N_INSNS (2), /* muldi */
662 COSTS_N_INSNS (6), /* divsi */
663 COSTS_N_INSNS (6), /* divdi */
664 COSTS_N_INSNS (4), /* fp */
665 COSTS_N_INSNS (5), /* dmul */
666 COSTS_N_INSNS (10), /* sdiv */
667 COSTS_N_INSNS (17), /* ddiv */
668 32, /* cache line size */
669 4, /* l1 cache */
670 16, /* l2 cache */
671 1, /* streams */
672 0, /* SF->DF convert */
673 };
674
675 /* Instruction costs on PPC403 processors. */
676 static const
677 struct processor_costs ppc403_cost = {
678 COSTS_N_INSNS (4), /* mulsi */
679 COSTS_N_INSNS (4), /* mulsi_const */
680 COSTS_N_INSNS (4), /* mulsi_const9 */
681 COSTS_N_INSNS (4), /* muldi */
682 COSTS_N_INSNS (33), /* divsi */
683 COSTS_N_INSNS (33), /* divdi */
684 COSTS_N_INSNS (11), /* fp */
685 COSTS_N_INSNS (11), /* dmul */
686 COSTS_N_INSNS (11), /* sdiv */
687 COSTS_N_INSNS (11), /* ddiv */
688 32, /* cache line size */
689 4, /* l1 cache */
690 16, /* l2 cache */
691 1, /* streams */
692 0, /* SF->DF convert */
693 };
694
695 /* Instruction costs on PPC405 processors. */
696 static const
697 struct processor_costs ppc405_cost = {
698 COSTS_N_INSNS (5), /* mulsi */
699 COSTS_N_INSNS (4), /* mulsi_const */
700 COSTS_N_INSNS (3), /* mulsi_const9 */
701 COSTS_N_INSNS (5), /* muldi */
702 COSTS_N_INSNS (35), /* divsi */
703 COSTS_N_INSNS (35), /* divdi */
704 COSTS_N_INSNS (11), /* fp */
705 COSTS_N_INSNS (11), /* dmul */
706 COSTS_N_INSNS (11), /* sdiv */
707 COSTS_N_INSNS (11), /* ddiv */
708 32, /* cache line size */
709 16, /* l1 cache */
710 128, /* l2 cache */
711 1, /* streams */
712 0, /* SF->DF convert */
713 };
714
715 /* Instruction costs on PPC440 processors. */
716 static const
717 struct processor_costs ppc440_cost = {
718 COSTS_N_INSNS (3), /* mulsi */
719 COSTS_N_INSNS (2), /* mulsi_const */
720 COSTS_N_INSNS (2), /* mulsi_const9 */
721 COSTS_N_INSNS (3), /* muldi */
722 COSTS_N_INSNS (34), /* divsi */
723 COSTS_N_INSNS (34), /* divdi */
724 COSTS_N_INSNS (5), /* fp */
725 COSTS_N_INSNS (5), /* dmul */
726 COSTS_N_INSNS (19), /* sdiv */
727 COSTS_N_INSNS (33), /* ddiv */
728 32, /* cache line size */
729 32, /* l1 cache */
730 256, /* l2 cache */
731 1, /* streams */
732 0, /* SF->DF convert */
733 };
734
735 /* Instruction costs on PPC476 processors. */
736 static const
737 struct processor_costs ppc476_cost = {
738 COSTS_N_INSNS (4), /* mulsi */
739 COSTS_N_INSNS (4), /* mulsi_const */
740 COSTS_N_INSNS (4), /* mulsi_const9 */
741 COSTS_N_INSNS (4), /* muldi */
742 COSTS_N_INSNS (11), /* divsi */
743 COSTS_N_INSNS (11), /* divdi */
744 COSTS_N_INSNS (6), /* fp */
745 COSTS_N_INSNS (6), /* dmul */
746 COSTS_N_INSNS (19), /* sdiv */
747 COSTS_N_INSNS (33), /* ddiv */
748 32, /* l1 cache line size */
749 32, /* l1 cache */
750 512, /* l2 cache */
751 1, /* streams */
752 0, /* SF->DF convert */
753 };
754
755 /* Instruction costs on PPC601 processors. */
756 static const
757 struct processor_costs ppc601_cost = {
758 COSTS_N_INSNS (5), /* mulsi */
759 COSTS_N_INSNS (5), /* mulsi_const */
760 COSTS_N_INSNS (5), /* mulsi_const9 */
761 COSTS_N_INSNS (5), /* muldi */
762 COSTS_N_INSNS (36), /* divsi */
763 COSTS_N_INSNS (36), /* divdi */
764 COSTS_N_INSNS (4), /* fp */
765 COSTS_N_INSNS (5), /* dmul */
766 COSTS_N_INSNS (17), /* sdiv */
767 COSTS_N_INSNS (31), /* ddiv */
768 32, /* cache line size */
769 32, /* l1 cache */
770 256, /* l2 cache */
771 1, /* streams */
772 0, /* SF->DF convert */
773 };
774
775 /* Instruction costs on PPC603 processors. */
776 static const
777 struct processor_costs ppc603_cost = {
778 COSTS_N_INSNS (5), /* mulsi */
779 COSTS_N_INSNS (3), /* mulsi_const */
780 COSTS_N_INSNS (2), /* mulsi_const9 */
781 COSTS_N_INSNS (5), /* muldi */
782 COSTS_N_INSNS (37), /* divsi */
783 COSTS_N_INSNS (37), /* divdi */
784 COSTS_N_INSNS (3), /* fp */
785 COSTS_N_INSNS (4), /* dmul */
786 COSTS_N_INSNS (18), /* sdiv */
787 COSTS_N_INSNS (33), /* ddiv */
788 32, /* cache line size */
789 8, /* l1 cache */
790 64, /* l2 cache */
791 1, /* streams */
792 0, /* SF->DF convert */
793 };
794
795 /* Instruction costs on PPC604 processors. */
796 static const
797 struct processor_costs ppc604_cost = {
798 COSTS_N_INSNS (4), /* mulsi */
799 COSTS_N_INSNS (4), /* mulsi_const */
800 COSTS_N_INSNS (4), /* mulsi_const9 */
801 COSTS_N_INSNS (4), /* muldi */
802 COSTS_N_INSNS (20), /* divsi */
803 COSTS_N_INSNS (20), /* divdi */
804 COSTS_N_INSNS (3), /* fp */
805 COSTS_N_INSNS (3), /* dmul */
806 COSTS_N_INSNS (18), /* sdiv */
807 COSTS_N_INSNS (32), /* ddiv */
808 32, /* cache line size */
809 16, /* l1 cache */
810 512, /* l2 cache */
811 1, /* streams */
812 0, /* SF->DF convert */
813 };
814
815 /* Instruction costs on PPC604e processors. */
816 static const
817 struct processor_costs ppc604e_cost = {
818 COSTS_N_INSNS (2), /* mulsi */
819 COSTS_N_INSNS (2), /* mulsi_const */
820 COSTS_N_INSNS (2), /* mulsi_const9 */
821 COSTS_N_INSNS (2), /* muldi */
822 COSTS_N_INSNS (20), /* divsi */
823 COSTS_N_INSNS (20), /* divdi */
824 COSTS_N_INSNS (3), /* fp */
825 COSTS_N_INSNS (3), /* dmul */
826 COSTS_N_INSNS (18), /* sdiv */
827 COSTS_N_INSNS (32), /* ddiv */
828 32, /* cache line size */
829 32, /* l1 cache */
830 1024, /* l2 cache */
831 1, /* streams */
832 0, /* SF->DF convert */
833 };
834
835 /* Instruction costs on PPC620 processors. */
836 static const
837 struct processor_costs ppc620_cost = {
838 COSTS_N_INSNS (5), /* mulsi */
839 COSTS_N_INSNS (4), /* mulsi_const */
840 COSTS_N_INSNS (3), /* mulsi_const9 */
841 COSTS_N_INSNS (7), /* muldi */
842 COSTS_N_INSNS (21), /* divsi */
843 COSTS_N_INSNS (37), /* divdi */
844 COSTS_N_INSNS (3), /* fp */
845 COSTS_N_INSNS (3), /* dmul */
846 COSTS_N_INSNS (18), /* sdiv */
847 COSTS_N_INSNS (32), /* ddiv */
848 128, /* cache line size */
849 32, /* l1 cache */
850 1024, /* l2 cache */
851 1, /* streams */
852 0, /* SF->DF convert */
853 };
854
855 /* Instruction costs on PPC630 processors. */
856 static const
857 struct processor_costs ppc630_cost = {
858 COSTS_N_INSNS (5), /* mulsi */
859 COSTS_N_INSNS (4), /* mulsi_const */
860 COSTS_N_INSNS (3), /* mulsi_const9 */
861 COSTS_N_INSNS (7), /* muldi */
862 COSTS_N_INSNS (21), /* divsi */
863 COSTS_N_INSNS (37), /* divdi */
864 COSTS_N_INSNS (3), /* fp */
865 COSTS_N_INSNS (3), /* dmul */
866 COSTS_N_INSNS (17), /* sdiv */
867 COSTS_N_INSNS (21), /* ddiv */
868 128, /* cache line size */
869 64, /* l1 cache */
870 1024, /* l2 cache */
871 1, /* streams */
872 0, /* SF->DF convert */
873 };
874
875 /* Instruction costs on Cell processor. */
876 /* COSTS_N_INSNS (1) ~ one add. */
877 static const
878 struct processor_costs ppccell_cost = {
879 COSTS_N_INSNS (9/2)+2, /* mulsi */
880 COSTS_N_INSNS (6/2), /* mulsi_const */
881 COSTS_N_INSNS (6/2), /* mulsi_const9 */
882 COSTS_N_INSNS (15/2)+2, /* muldi */
883 COSTS_N_INSNS (38/2), /* divsi */
884 COSTS_N_INSNS (70/2), /* divdi */
885 COSTS_N_INSNS (10/2), /* fp */
886 COSTS_N_INSNS (10/2), /* dmul */
887 COSTS_N_INSNS (74/2), /* sdiv */
888 COSTS_N_INSNS (74/2), /* ddiv */
889 128, /* cache line size */
890 32, /* l1 cache */
891 512, /* l2 cache */
892 6, /* streams */
893 0, /* SF->DF convert */
894 };
895
896 /* Instruction costs on PPC750 and PPC7400 processors. */
897 static const
898 struct processor_costs ppc750_cost = {
899 COSTS_N_INSNS (5), /* mulsi */
900 COSTS_N_INSNS (3), /* mulsi_const */
901 COSTS_N_INSNS (2), /* mulsi_const9 */
902 COSTS_N_INSNS (5), /* muldi */
903 COSTS_N_INSNS (17), /* divsi */
904 COSTS_N_INSNS (17), /* divdi */
905 COSTS_N_INSNS (3), /* fp */
906 COSTS_N_INSNS (3), /* dmul */
907 COSTS_N_INSNS (17), /* sdiv */
908 COSTS_N_INSNS (31), /* ddiv */
909 32, /* cache line size */
910 32, /* l1 cache */
911 512, /* l2 cache */
912 1, /* streams */
913 0, /* SF->DF convert */
914 };
915
916 /* Instruction costs on PPC7450 processors. */
917 static const
918 struct processor_costs ppc7450_cost = {
919 COSTS_N_INSNS (4), /* mulsi */
920 COSTS_N_INSNS (3), /* mulsi_const */
921 COSTS_N_INSNS (3), /* mulsi_const9 */
922 COSTS_N_INSNS (4), /* muldi */
923 COSTS_N_INSNS (23), /* divsi */
924 COSTS_N_INSNS (23), /* divdi */
925 COSTS_N_INSNS (5), /* fp */
926 COSTS_N_INSNS (5), /* dmul */
927 COSTS_N_INSNS (21), /* sdiv */
928 COSTS_N_INSNS (35), /* ddiv */
929 32, /* cache line size */
930 32, /* l1 cache */
931 1024, /* l2 cache */
932 1, /* streams */
933 0, /* SF->DF convert */
934 };
935
936 /* Instruction costs on PPC8540 processors. */
937 static const
938 struct processor_costs ppc8540_cost = {
939 COSTS_N_INSNS (4), /* mulsi */
940 COSTS_N_INSNS (4), /* mulsi_const */
941 COSTS_N_INSNS (4), /* mulsi_const9 */
942 COSTS_N_INSNS (4), /* muldi */
943 COSTS_N_INSNS (19), /* divsi */
944 COSTS_N_INSNS (19), /* divdi */
945 COSTS_N_INSNS (4), /* fp */
946 COSTS_N_INSNS (4), /* dmul */
947 COSTS_N_INSNS (29), /* sdiv */
948 COSTS_N_INSNS (29), /* ddiv */
949 32, /* cache line size */
950 32, /* l1 cache */
951 256, /* l2 cache */
952 1, /* prefetch streams /*/
953 0, /* SF->DF convert */
954 };
955
956 /* Instruction costs on E300C2 and E300C3 cores. */
957 static const
958 struct processor_costs ppce300c2c3_cost = {
959 COSTS_N_INSNS (4), /* mulsi */
960 COSTS_N_INSNS (4), /* mulsi_const */
961 COSTS_N_INSNS (4), /* mulsi_const9 */
962 COSTS_N_INSNS (4), /* muldi */
963 COSTS_N_INSNS (19), /* divsi */
964 COSTS_N_INSNS (19), /* divdi */
965 COSTS_N_INSNS (3), /* fp */
966 COSTS_N_INSNS (4), /* dmul */
967 COSTS_N_INSNS (18), /* sdiv */
968 COSTS_N_INSNS (33), /* ddiv */
969 32,
970 16, /* l1 cache */
971 16, /* l2 cache */
972 1, /* prefetch streams /*/
973 0, /* SF->DF convert */
974 };
975
976 /* Instruction costs on PPCE500MC processors. */
977 static const
978 struct processor_costs ppce500mc_cost = {
979 COSTS_N_INSNS (4), /* mulsi */
980 COSTS_N_INSNS (4), /* mulsi_const */
981 COSTS_N_INSNS (4), /* mulsi_const9 */
982 COSTS_N_INSNS (4), /* muldi */
983 COSTS_N_INSNS (14), /* divsi */
984 COSTS_N_INSNS (14), /* divdi */
985 COSTS_N_INSNS (8), /* fp */
986 COSTS_N_INSNS (10), /* dmul */
987 COSTS_N_INSNS (36), /* sdiv */
988 COSTS_N_INSNS (66), /* ddiv */
989 64, /* cache line size */
990 32, /* l1 cache */
991 128, /* l2 cache */
992 1, /* prefetch streams /*/
993 0, /* SF->DF convert */
994 };
995
996 /* Instruction costs on PPCE500MC64 processors. */
997 static const
998 struct processor_costs ppce500mc64_cost = {
999 COSTS_N_INSNS (4), /* mulsi */
1000 COSTS_N_INSNS (4), /* mulsi_const */
1001 COSTS_N_INSNS (4), /* mulsi_const9 */
1002 COSTS_N_INSNS (4), /* muldi */
1003 COSTS_N_INSNS (14), /* divsi */
1004 COSTS_N_INSNS (14), /* divdi */
1005 COSTS_N_INSNS (4), /* fp */
1006 COSTS_N_INSNS (10), /* dmul */
1007 COSTS_N_INSNS (36), /* sdiv */
1008 COSTS_N_INSNS (66), /* ddiv */
1009 64, /* cache line size */
1010 32, /* l1 cache */
1011 128, /* l2 cache */
1012 1, /* prefetch streams /*/
1013 0, /* SF->DF convert */
1014 };
1015
1016 /* Instruction costs on PPCE5500 processors. */
1017 static const
1018 struct processor_costs ppce5500_cost = {
1019 COSTS_N_INSNS (5), /* mulsi */
1020 COSTS_N_INSNS (5), /* mulsi_const */
1021 COSTS_N_INSNS (4), /* mulsi_const9 */
1022 COSTS_N_INSNS (5), /* muldi */
1023 COSTS_N_INSNS (14), /* divsi */
1024 COSTS_N_INSNS (14), /* divdi */
1025 COSTS_N_INSNS (7), /* fp */
1026 COSTS_N_INSNS (10), /* dmul */
1027 COSTS_N_INSNS (36), /* sdiv */
1028 COSTS_N_INSNS (66), /* ddiv */
1029 64, /* cache line size */
1030 32, /* l1 cache */
1031 128, /* l2 cache */
1032 1, /* prefetch streams /*/
1033 0, /* SF->DF convert */
1034 };
1035
1036 /* Instruction costs on PPCE6500 processors. */
1037 static const
1038 struct processor_costs ppce6500_cost = {
1039 COSTS_N_INSNS (5), /* mulsi */
1040 COSTS_N_INSNS (5), /* mulsi_const */
1041 COSTS_N_INSNS (4), /* mulsi_const9 */
1042 COSTS_N_INSNS (5), /* muldi */
1043 COSTS_N_INSNS (14), /* divsi */
1044 COSTS_N_INSNS (14), /* divdi */
1045 COSTS_N_INSNS (7), /* fp */
1046 COSTS_N_INSNS (10), /* dmul */
1047 COSTS_N_INSNS (36), /* sdiv */
1048 COSTS_N_INSNS (66), /* ddiv */
1049 64, /* cache line size */
1050 32, /* l1 cache */
1051 128, /* l2 cache */
1052 1, /* prefetch streams /*/
1053 0, /* SF->DF convert */
1054 };
1055
1056 /* Instruction costs on AppliedMicro Titan processors. */
1057 static const
1058 struct processor_costs titan_cost = {
1059 COSTS_N_INSNS (5), /* mulsi */
1060 COSTS_N_INSNS (5), /* mulsi_const */
1061 COSTS_N_INSNS (5), /* mulsi_const9 */
1062 COSTS_N_INSNS (5), /* muldi */
1063 COSTS_N_INSNS (18), /* divsi */
1064 COSTS_N_INSNS (18), /* divdi */
1065 COSTS_N_INSNS (10), /* fp */
1066 COSTS_N_INSNS (10), /* dmul */
1067 COSTS_N_INSNS (46), /* sdiv */
1068 COSTS_N_INSNS (72), /* ddiv */
1069 32, /* cache line size */
1070 32, /* l1 cache */
1071 512, /* l2 cache */
1072 1, /* prefetch streams /*/
1073 0, /* SF->DF convert */
1074 };
1075
1076 /* Instruction costs on POWER4 and POWER5 processors. */
1077 static const
1078 struct processor_costs power4_cost = {
1079 COSTS_N_INSNS (3), /* mulsi */
1080 COSTS_N_INSNS (2), /* mulsi_const */
1081 COSTS_N_INSNS (2), /* mulsi_const9 */
1082 COSTS_N_INSNS (4), /* muldi */
1083 COSTS_N_INSNS (18), /* divsi */
1084 COSTS_N_INSNS (34), /* divdi */
1085 COSTS_N_INSNS (3), /* fp */
1086 COSTS_N_INSNS (3), /* dmul */
1087 COSTS_N_INSNS (17), /* sdiv */
1088 COSTS_N_INSNS (17), /* ddiv */
1089 128, /* cache line size */
1090 32, /* l1 cache */
1091 1024, /* l2 cache */
1092 8, /* prefetch streams /*/
1093 0, /* SF->DF convert */
1094 };
1095
1096 /* Instruction costs on POWER6 processors. */
1097 static const
1098 struct processor_costs power6_cost = {
1099 COSTS_N_INSNS (8), /* mulsi */
1100 COSTS_N_INSNS (8), /* mulsi_const */
1101 COSTS_N_INSNS (8), /* mulsi_const9 */
1102 COSTS_N_INSNS (8), /* muldi */
1103 COSTS_N_INSNS (22), /* divsi */
1104 COSTS_N_INSNS (28), /* divdi */
1105 COSTS_N_INSNS (3), /* fp */
1106 COSTS_N_INSNS (3), /* dmul */
1107 COSTS_N_INSNS (13), /* sdiv */
1108 COSTS_N_INSNS (16), /* ddiv */
1109 128, /* cache line size */
1110 64, /* l1 cache */
1111 2048, /* l2 cache */
1112 16, /* prefetch streams */
1113 0, /* SF->DF convert */
1114 };
1115
1116 /* Instruction costs on POWER7 processors. */
1117 static const
1118 struct processor_costs power7_cost = {
1119 COSTS_N_INSNS (2), /* mulsi */
1120 COSTS_N_INSNS (2), /* mulsi_const */
1121 COSTS_N_INSNS (2), /* mulsi_const9 */
1122 COSTS_N_INSNS (2), /* muldi */
1123 COSTS_N_INSNS (18), /* divsi */
1124 COSTS_N_INSNS (34), /* divdi */
1125 COSTS_N_INSNS (3), /* fp */
1126 COSTS_N_INSNS (3), /* dmul */
1127 COSTS_N_INSNS (13), /* sdiv */
1128 COSTS_N_INSNS (16), /* ddiv */
1129 128, /* cache line size */
1130 32, /* l1 cache */
1131 256, /* l2 cache */
1132 12, /* prefetch streams */
1133 COSTS_N_INSNS (3), /* SF->DF convert */
1134 };
1135
1136 /* Instruction costs on POWER8 processors. */
1137 static const
1138 struct processor_costs power8_cost = {
1139 COSTS_N_INSNS (3), /* mulsi */
1140 COSTS_N_INSNS (3), /* mulsi_const */
1141 COSTS_N_INSNS (3), /* mulsi_const9 */
1142 COSTS_N_INSNS (3), /* muldi */
1143 COSTS_N_INSNS (19), /* divsi */
1144 COSTS_N_INSNS (35), /* divdi */
1145 COSTS_N_INSNS (3), /* fp */
1146 COSTS_N_INSNS (3), /* dmul */
1147 COSTS_N_INSNS (14), /* sdiv */
1148 COSTS_N_INSNS (17), /* ddiv */
1149 128, /* cache line size */
1150 32, /* l1 cache */
1151 256, /* l2 cache */
1152 12, /* prefetch streams */
1153 COSTS_N_INSNS (3), /* SF->DF convert */
1154 };
1155
1156 /* Instruction costs on POWER9 processors. */
1157 static const
1158 struct processor_costs power9_cost = {
1159 COSTS_N_INSNS (3), /* mulsi */
1160 COSTS_N_INSNS (3), /* mulsi_const */
1161 COSTS_N_INSNS (3), /* mulsi_const9 */
1162 COSTS_N_INSNS (3), /* muldi */
1163 COSTS_N_INSNS (8), /* divsi */
1164 COSTS_N_INSNS (12), /* divdi */
1165 COSTS_N_INSNS (3), /* fp */
1166 COSTS_N_INSNS (3), /* dmul */
1167 COSTS_N_INSNS (13), /* sdiv */
1168 COSTS_N_INSNS (18), /* ddiv */
1169 128, /* cache line size */
1170 32, /* l1 cache */
1171 512, /* l2 cache */
1172 8, /* prefetch streams */
1173 COSTS_N_INSNS (3), /* SF->DF convert */
1174 };
1175
1176 /* Instruction costs on POWER A2 processors. */
1177 static const
1178 struct processor_costs ppca2_cost = {
1179 COSTS_N_INSNS (16), /* mulsi */
1180 COSTS_N_INSNS (16), /* mulsi_const */
1181 COSTS_N_INSNS (16), /* mulsi_const9 */
1182 COSTS_N_INSNS (16), /* muldi */
1183 COSTS_N_INSNS (22), /* divsi */
1184 COSTS_N_INSNS (28), /* divdi */
1185 COSTS_N_INSNS (3), /* fp */
1186 COSTS_N_INSNS (3), /* dmul */
1187 COSTS_N_INSNS (59), /* sdiv */
1188 COSTS_N_INSNS (72), /* ddiv */
1189 64,
1190 16, /* l1 cache */
1191 2048, /* l2 cache */
1192 16, /* prefetch streams */
1193 0, /* SF->DF convert */
1194 };
1195
1196 \f
1197 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1198 #undef RS6000_BUILTIN_0
1199 #undef RS6000_BUILTIN_1
1200 #undef RS6000_BUILTIN_2
1201 #undef RS6000_BUILTIN_3
1202 #undef RS6000_BUILTIN_A
1203 #undef RS6000_BUILTIN_D
1204 #undef RS6000_BUILTIN_H
1205 #undef RS6000_BUILTIN_P
1206 #undef RS6000_BUILTIN_X
1207
1208 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1209 { NAME, ICODE, MASK, ATTR },
1210
1211 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1212 { NAME, ICODE, MASK, ATTR },
1213
1214 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1215 { NAME, ICODE, MASK, ATTR },
1216
1217 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1218 { NAME, ICODE, MASK, ATTR },
1219
1220 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1221 { NAME, ICODE, MASK, ATTR },
1222
1223 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1224 { NAME, ICODE, MASK, ATTR },
1225
1226 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1227 { NAME, ICODE, MASK, ATTR },
1228
1229 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1230 { NAME, ICODE, MASK, ATTR },
1231
1232 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1233 { NAME, ICODE, MASK, ATTR },
1234
1235 struct rs6000_builtin_info_type {
1236 const char *name;
1237 const enum insn_code icode;
1238 const HOST_WIDE_INT mask;
1239 const unsigned attr;
1240 };
1241
1242 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1243 {
1244 #include "rs6000-builtin.def"
1245 };
1246
1247 #undef RS6000_BUILTIN_0
1248 #undef RS6000_BUILTIN_1
1249 #undef RS6000_BUILTIN_2
1250 #undef RS6000_BUILTIN_3
1251 #undef RS6000_BUILTIN_A
1252 #undef RS6000_BUILTIN_D
1253 #undef RS6000_BUILTIN_H
1254 #undef RS6000_BUILTIN_P
1255 #undef RS6000_BUILTIN_X
1256
1257 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1258 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1259
1260 \f
1261 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1262 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1263 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1264 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1265 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1266 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1267 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1268 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1269 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1270 bool);
1271 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1272 unsigned int);
1273 static bool is_microcoded_insn (rtx_insn *);
1274 static bool is_nonpipeline_insn (rtx_insn *);
1275 static bool is_cracked_insn (rtx_insn *);
1276 static bool is_load_insn (rtx, rtx *);
1277 static bool is_store_insn (rtx, rtx *);
1278 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1279 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1280 static bool insn_must_be_first_in_group (rtx_insn *);
1281 static bool insn_must_be_last_in_group (rtx_insn *);
1282 static void altivec_init_builtins (void);
1283 static tree builtin_function_type (machine_mode, machine_mode,
1284 machine_mode, machine_mode,
1285 enum rs6000_builtins, const char *name);
1286 static void rs6000_common_init_builtins (void);
1287 static void htm_init_builtins (void);
1288 int easy_vector_constant (rtx, machine_mode);
1289 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1290 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1291 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1292 bool, bool);
1293 #if TARGET_MACHO
1294 static void macho_branch_islands (void);
1295 static tree get_prev_label (tree);
1296 #endif
1297 static bool rs6000_mode_dependent_address (const_rtx);
1298 static bool rs6000_debug_mode_dependent_address (const_rtx);
1299 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1300 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1301 machine_mode, rtx);
1302 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1303 machine_mode,
1304 rtx);
1305 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1306 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1307 enum reg_class);
1308 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1309 reg_class_t,
1310 reg_class_t);
1311 static bool rs6000_debug_can_change_mode_class (machine_mode,
1312 machine_mode,
1313 reg_class_t);
1314 static rtx rs6000_internal_arg_pointer (void);
1315
1316 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1317 = rs6000_mode_dependent_address;
1318
1319 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1320 machine_mode, rtx)
1321 = rs6000_secondary_reload_class;
1322
1323 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1324 = rs6000_preferred_reload_class;
1325
1326 const int INSN_NOT_AVAILABLE = -1;
1327
1328 static void rs6000_print_isa_options (FILE *, int, const char *,
1329 HOST_WIDE_INT);
1330 static void rs6000_print_builtin_options (FILE *, int, const char *,
1331 HOST_WIDE_INT);
1332 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1333
1334 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1335 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1336 enum rs6000_reg_type,
1337 machine_mode,
1338 secondary_reload_info *,
1339 bool);
1340 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1341 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1342
1343 /* Hash table stuff for keeping track of TOC entries. */
1344
1345 struct GTY((for_user)) toc_hash_struct
1346 {
1347 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1348 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1349 rtx key;
1350 machine_mode key_mode;
1351 int labelno;
1352 };
1353
1354 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1355 {
1356 static hashval_t hash (toc_hash_struct *);
1357 static bool equal (toc_hash_struct *, toc_hash_struct *);
1358 };
1359
1360 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1361
1362 /* Hash table to keep track of the argument types for builtin functions. */
1363
1364 struct GTY((for_user)) builtin_hash_struct
1365 {
1366 tree type;
1367 machine_mode mode[4]; /* return value + 3 arguments. */
1368 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1369 };
1370
1371 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1372 {
1373 static hashval_t hash (builtin_hash_struct *);
1374 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1375 };
1376
1377 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1378
1379 \f
1380 /* Default register names. */
1381 char rs6000_reg_names[][8] =
1382 {
1383 /* GPRs */
1384 "0", "1", "2", "3", "4", "5", "6", "7",
1385 "8", "9", "10", "11", "12", "13", "14", "15",
1386 "16", "17", "18", "19", "20", "21", "22", "23",
1387 "24", "25", "26", "27", "28", "29", "30", "31",
1388 /* FPRs */
1389 "0", "1", "2", "3", "4", "5", "6", "7",
1390 "8", "9", "10", "11", "12", "13", "14", "15",
1391 "16", "17", "18", "19", "20", "21", "22", "23",
1392 "24", "25", "26", "27", "28", "29", "30", "31",
1393 /* VRs */
1394 "0", "1", "2", "3", "4", "5", "6", "7",
1395 "8", "9", "10", "11", "12", "13", "14", "15",
1396 "16", "17", "18", "19", "20", "21", "22", "23",
1397 "24", "25", "26", "27", "28", "29", "30", "31",
1398 /* lr ctr ca ap */
1399 "lr", "ctr", "ca", "ap",
1400 /* cr0..cr7 */
1401 "0", "1", "2", "3", "4", "5", "6", "7",
1402 /* vrsave vscr sfp */
1403 "vrsave", "vscr", "sfp",
1404 };
1405
1406 #ifdef TARGET_REGNAMES
1407 static const char alt_reg_names[][8] =
1408 {
1409 /* GPRs */
1410 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1411 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1412 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1413 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1414 /* FPRs */
1415 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1416 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1417 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1418 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1419 /* VRs */
1420 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1421 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1422 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1423 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1424 /* lr ctr ca ap */
1425 "lr", "ctr", "ca", "ap",
1426 /* cr0..cr7 */
1427 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1428 /* vrsave vscr sfp */
1429 "vrsave", "vscr", "sfp",
1430 };
1431 #endif
1432
1433 /* Table of valid machine attributes. */
1434
1435 static const struct attribute_spec rs6000_attribute_table[] =
1436 {
1437 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1438 affects_type_identity, handler, exclude } */
1439 { "altivec", 1, 1, false, true, false, false,
1440 rs6000_handle_altivec_attribute, NULL },
1441 { "longcall", 0, 0, false, true, true, false,
1442 rs6000_handle_longcall_attribute, NULL },
1443 { "shortcall", 0, 0, false, true, true, false,
1444 rs6000_handle_longcall_attribute, NULL },
1445 { "ms_struct", 0, 0, false, false, false, false,
1446 rs6000_handle_struct_attribute, NULL },
1447 { "gcc_struct", 0, 0, false, false, false, false,
1448 rs6000_handle_struct_attribute, NULL },
1449 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1450 SUBTARGET_ATTRIBUTE_TABLE,
1451 #endif
1452 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1453 };
1454 \f
1455 #ifndef TARGET_PROFILE_KERNEL
1456 #define TARGET_PROFILE_KERNEL 0
1457 #endif
1458 \f
1459 /* Initialize the GCC target structure. */
1460 #undef TARGET_ATTRIBUTE_TABLE
1461 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1462 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1463 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1464 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1465 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1466
1467 #undef TARGET_ASM_ALIGNED_DI_OP
1468 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1469
1470 /* Default unaligned ops are only provided for ELF. Find the ops needed
1471 for non-ELF systems. */
1472 #ifndef OBJECT_FORMAT_ELF
1473 #if TARGET_XCOFF
1474 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1475 64-bit targets. */
1476 #undef TARGET_ASM_UNALIGNED_HI_OP
1477 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1478 #undef TARGET_ASM_UNALIGNED_SI_OP
1479 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1480 #undef TARGET_ASM_UNALIGNED_DI_OP
1481 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1482 #else
1483 /* For Darwin. */
1484 #undef TARGET_ASM_UNALIGNED_HI_OP
1485 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1486 #undef TARGET_ASM_UNALIGNED_SI_OP
1487 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1488 #undef TARGET_ASM_UNALIGNED_DI_OP
1489 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1490 #undef TARGET_ASM_ALIGNED_DI_OP
1491 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1492 #endif
1493 #endif
1494
1495 /* This hook deals with fixups for relocatable code and DI-mode objects
1496 in 64-bit code. */
1497 #undef TARGET_ASM_INTEGER
1498 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1499
1500 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1501 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1502 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1503 #endif
1504
1505 #undef TARGET_SET_UP_BY_PROLOGUE
1506 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1507
1508 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1509 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1510 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1511 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1512 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1513 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1514 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1515 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1516 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1517 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1518 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1519 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1520
1521 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1522 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1523
1524 #undef TARGET_INTERNAL_ARG_POINTER
1525 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1526
1527 #undef TARGET_HAVE_TLS
1528 #define TARGET_HAVE_TLS HAVE_AS_TLS
1529
1530 #undef TARGET_CANNOT_FORCE_CONST_MEM
1531 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1532
1533 #undef TARGET_DELEGITIMIZE_ADDRESS
1534 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1535
1536 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1537 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1538
1539 #undef TARGET_LEGITIMATE_COMBINED_INSN
1540 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1541
1542 #undef TARGET_ASM_FUNCTION_PROLOGUE
1543 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1544 #undef TARGET_ASM_FUNCTION_EPILOGUE
1545 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1546
1547 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1548 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1549
1550 #undef TARGET_LEGITIMIZE_ADDRESS
1551 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1552
1553 #undef TARGET_SCHED_VARIABLE_ISSUE
1554 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1555
1556 #undef TARGET_SCHED_ISSUE_RATE
1557 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1558 #undef TARGET_SCHED_ADJUST_COST
1559 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1560 #undef TARGET_SCHED_ADJUST_PRIORITY
1561 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1562 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1563 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1564 #undef TARGET_SCHED_INIT
1565 #define TARGET_SCHED_INIT rs6000_sched_init
1566 #undef TARGET_SCHED_FINISH
1567 #define TARGET_SCHED_FINISH rs6000_sched_finish
1568 #undef TARGET_SCHED_REORDER
1569 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1570 #undef TARGET_SCHED_REORDER2
1571 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1572
1573 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1574 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1575
1576 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1577 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1578
1579 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1580 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1581 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1582 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1583 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1584 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1585 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1586 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1587
1588 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1589 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1590
1591 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1592 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1593 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1594 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1595 rs6000_builtin_support_vector_misalignment
1596 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1597 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1598 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1599 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1600 rs6000_builtin_vectorization_cost
1601 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1602 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1603 rs6000_preferred_simd_mode
1604 #undef TARGET_VECTORIZE_INIT_COST
1605 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1606 #undef TARGET_VECTORIZE_ADD_STMT_COST
1607 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1608 #undef TARGET_VECTORIZE_FINISH_COST
1609 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1610 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1611 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1612
1613 #undef TARGET_INIT_BUILTINS
1614 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1615 #undef TARGET_BUILTIN_DECL
1616 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1617
1618 #undef TARGET_FOLD_BUILTIN
1619 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1620 #undef TARGET_GIMPLE_FOLD_BUILTIN
1621 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1622
1623 #undef TARGET_EXPAND_BUILTIN
1624 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1625
1626 #undef TARGET_MANGLE_TYPE
1627 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1628
1629 #undef TARGET_INIT_LIBFUNCS
1630 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1631
1632 #if TARGET_MACHO
1633 #undef TARGET_BINDS_LOCAL_P
1634 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1635 #endif
1636
1637 #undef TARGET_MS_BITFIELD_LAYOUT_P
1638 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1639
1640 #undef TARGET_ASM_OUTPUT_MI_THUNK
1641 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1642
1643 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1644 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1645
1646 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1647 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1648
1649 #undef TARGET_REGISTER_MOVE_COST
1650 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1651 #undef TARGET_MEMORY_MOVE_COST
1652 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1653 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1654 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1655 rs6000_ira_change_pseudo_allocno_class
1656 #undef TARGET_CANNOT_COPY_INSN_P
1657 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1658 #undef TARGET_RTX_COSTS
1659 #define TARGET_RTX_COSTS rs6000_rtx_costs
1660 #undef TARGET_ADDRESS_COST
1661 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1662 #undef TARGET_INSN_COST
1663 #define TARGET_INSN_COST rs6000_insn_cost
1664
1665 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1666 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1667
1668 #undef TARGET_PROMOTE_FUNCTION_MODE
1669 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1670
1671 #undef TARGET_RETURN_IN_MEMORY
1672 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1673
1674 #undef TARGET_RETURN_IN_MSB
1675 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1676
1677 #undef TARGET_SETUP_INCOMING_VARARGS
1678 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1679
1680 /* Always strict argument naming on rs6000. */
1681 #undef TARGET_STRICT_ARGUMENT_NAMING
1682 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1683 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1684 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1685 #undef TARGET_SPLIT_COMPLEX_ARG
1686 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1687 #undef TARGET_MUST_PASS_IN_STACK
1688 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1689 #undef TARGET_PASS_BY_REFERENCE
1690 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1691 #undef TARGET_ARG_PARTIAL_BYTES
1692 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1693 #undef TARGET_FUNCTION_ARG_ADVANCE
1694 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1695 #undef TARGET_FUNCTION_ARG
1696 #define TARGET_FUNCTION_ARG rs6000_function_arg
1697 #undef TARGET_FUNCTION_ARG_PADDING
1698 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1699 #undef TARGET_FUNCTION_ARG_BOUNDARY
1700 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1701
1702 #undef TARGET_BUILD_BUILTIN_VA_LIST
1703 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1704
1705 #undef TARGET_EXPAND_BUILTIN_VA_START
1706 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1707
1708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1709 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1710
1711 #undef TARGET_EH_RETURN_FILTER_MODE
1712 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1713
1714 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1715 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1716
1717 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1718 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1719
1720 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1721 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1722
1723 #undef TARGET_FLOATN_MODE
1724 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1725
1726 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1727 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1728
1729 #undef TARGET_MD_ASM_ADJUST
1730 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1731
1732 #undef TARGET_OPTION_OVERRIDE
1733 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1734
1735 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1736 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1737 rs6000_builtin_vectorized_function
1738
1739 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1740 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1741 rs6000_builtin_md_vectorized_function
1742
1743 #undef TARGET_STACK_PROTECT_GUARD
1744 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1745
1746 #if !TARGET_MACHO
1747 #undef TARGET_STACK_PROTECT_FAIL
1748 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1749 #endif
1750
1751 #ifdef HAVE_AS_TLS
1752 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1753 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1754 #endif
1755
1756 /* Use a 32-bit anchor range. This leads to sequences like:
1757
1758 addis tmp,anchor,high
1759 add dest,tmp,low
1760
1761 where tmp itself acts as an anchor, and can be shared between
1762 accesses to the same 64k page. */
1763 #undef TARGET_MIN_ANCHOR_OFFSET
1764 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1765 #undef TARGET_MAX_ANCHOR_OFFSET
1766 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1767 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1768 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1769 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1770 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1771
1772 #undef TARGET_BUILTIN_RECIPROCAL
1773 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1774
1775 #undef TARGET_SECONDARY_RELOAD
1776 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1777 #undef TARGET_SECONDARY_MEMORY_NEEDED
1778 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1779 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1780 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1781
1782 #undef TARGET_LEGITIMATE_ADDRESS_P
1783 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1784
1785 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1786 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1787
1788 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1789 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1790
1791 #undef TARGET_CAN_ELIMINATE
1792 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1793
1794 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1795 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1796
1797 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1798 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1799
1800 #undef TARGET_TRAMPOLINE_INIT
1801 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1802
1803 #undef TARGET_FUNCTION_VALUE
1804 #define TARGET_FUNCTION_VALUE rs6000_function_value
1805
1806 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1807 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1808
1809 #undef TARGET_OPTION_SAVE
1810 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1811
1812 #undef TARGET_OPTION_RESTORE
1813 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1814
1815 #undef TARGET_OPTION_PRINT
1816 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1817
1818 #undef TARGET_CAN_INLINE_P
1819 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1820
1821 #undef TARGET_SET_CURRENT_FUNCTION
1822 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1823
1824 #undef TARGET_LEGITIMATE_CONSTANT_P
1825 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1826
1827 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1828 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1829
1830 #undef TARGET_CAN_USE_DOLOOP_P
1831 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1832
1833 #undef TARGET_PREDICT_DOLOOP_P
1834 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1835
1836 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1837 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1838
1839 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1840 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1841 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1842 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1843 #undef TARGET_UNWIND_WORD_MODE
1844 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1845
1846 #undef TARGET_OFFLOAD_OPTIONS
1847 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1848
1849 #undef TARGET_C_MODE_FOR_SUFFIX
1850 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1851
1852 #undef TARGET_INVALID_BINARY_OP
1853 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1854
1855 #undef TARGET_OPTAB_SUPPORTED_P
1856 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1857
1858 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1859 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1860
1861 #undef TARGET_COMPARE_VERSION_PRIORITY
1862 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1863
1864 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1865 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1866 rs6000_generate_version_dispatcher_body
1867
1868 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1869 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1870 rs6000_get_function_versions_dispatcher
1871
1872 #undef TARGET_OPTION_FUNCTION_VERSIONS
1873 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1874
1875 #undef TARGET_HARD_REGNO_NREGS
1876 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1877 #undef TARGET_HARD_REGNO_MODE_OK
1878 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1879
1880 #undef TARGET_MODES_TIEABLE_P
1881 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1882
1883 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1884 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1885 rs6000_hard_regno_call_part_clobbered
1886
1887 #undef TARGET_SLOW_UNALIGNED_ACCESS
1888 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1889
1890 #undef TARGET_CAN_CHANGE_MODE_CLASS
1891 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1892
1893 #undef TARGET_CONSTANT_ALIGNMENT
1894 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1895
1896 #undef TARGET_STARTING_FRAME_OFFSET
1897 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1898
1899 #if TARGET_ELF && RS6000_WEAK
1900 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1901 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1902 #endif
1903
1904 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1905 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1906
1907 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1908 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1909 \f
1910
1911 /* Processor table. */
1912 struct rs6000_ptt
1913 {
1914 const char *const name; /* Canonical processor name. */
1915 const enum processor_type processor; /* Processor type enum value. */
1916 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1917 };
1918
1919 static struct rs6000_ptt const processor_target_table[] =
1920 {
1921 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1922 #include "rs6000-cpus.def"
1923 #undef RS6000_CPU
1924 };
1925
1926 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1927 name is invalid. */
1928
1929 static int
1930 rs6000_cpu_name_lookup (const char *name)
1931 {
1932 size_t i;
1933
1934 if (name != NULL)
1935 {
1936 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1937 if (! strcmp (name, processor_target_table[i].name))
1938 return (int)i;
1939 }
1940
1941 return -1;
1942 }
1943
1944 \f
1945 /* Return number of consecutive hard regs needed starting at reg REGNO
1946 to hold something of mode MODE.
1947 This is ordinarily the length in words of a value of mode MODE
1948 but can be less for certain modes in special long registers.
1949
1950 POWER and PowerPC GPRs hold 32 bits worth;
1951 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1952
1953 static int
1954 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1955 {
1956 unsigned HOST_WIDE_INT reg_size;
1957
1958 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1959 128-bit floating point that can go in vector registers, which has VSX
1960 memory addressing. */
1961 if (FP_REGNO_P (regno))
1962 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1963 ? UNITS_PER_VSX_WORD
1964 : UNITS_PER_FP_WORD);
1965
1966 else if (ALTIVEC_REGNO_P (regno))
1967 reg_size = UNITS_PER_ALTIVEC_WORD;
1968
1969 else
1970 reg_size = UNITS_PER_WORD;
1971
1972 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1973 }
1974
1975 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1976 MODE. */
1977 static int
1978 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1979 {
1980 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1981
1982 if (COMPLEX_MODE_P (mode))
1983 mode = GET_MODE_INNER (mode);
1984
1985 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1986 register combinations, and use PTImode where we need to deal with quad
1987 word memory operations. Don't allow quad words in the argument or frame
1988 pointer registers, just registers 0..31. */
1989 if (mode == PTImode)
1990 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1991 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1992 && ((regno & 1) == 0));
1993
1994 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1995 implementations. Don't allow an item to be split between a FP register
1996 and an Altivec register. Allow TImode in all VSX registers if the user
1997 asked for it. */
1998 if (TARGET_VSX && VSX_REGNO_P (regno)
1999 && (VECTOR_MEM_VSX_P (mode)
2000 || FLOAT128_VECTOR_P (mode)
2001 || reg_addr[mode].scalar_in_vmx_p
2002 || mode == TImode
2003 || (TARGET_VADDUQM && mode == V1TImode)))
2004 {
2005 if (FP_REGNO_P (regno))
2006 return FP_REGNO_P (last_regno);
2007
2008 if (ALTIVEC_REGNO_P (regno))
2009 {
2010 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2011 return 0;
2012
2013 return ALTIVEC_REGNO_P (last_regno);
2014 }
2015 }
2016
2017 /* The GPRs can hold any mode, but values bigger than one register
2018 cannot go past R31. */
2019 if (INT_REGNO_P (regno))
2020 return INT_REGNO_P (last_regno);
2021
2022 /* The float registers (except for VSX vector modes) can only hold floating
2023 modes and DImode. */
2024 if (FP_REGNO_P (regno))
2025 {
2026 if (FLOAT128_VECTOR_P (mode))
2027 return false;
2028
2029 if (SCALAR_FLOAT_MODE_P (mode)
2030 && (mode != TDmode || (regno % 2) == 0)
2031 && FP_REGNO_P (last_regno))
2032 return 1;
2033
2034 if (GET_MODE_CLASS (mode) == MODE_INT)
2035 {
2036 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2037 return 1;
2038
2039 if (TARGET_P8_VECTOR && (mode == SImode))
2040 return 1;
2041
2042 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2043 return 1;
2044 }
2045
2046 return 0;
2047 }
2048
2049 /* The CR register can only hold CC modes. */
2050 if (CR_REGNO_P (regno))
2051 return GET_MODE_CLASS (mode) == MODE_CC;
2052
2053 if (CA_REGNO_P (regno))
2054 return mode == Pmode || mode == SImode;
2055
2056 /* AltiVec only in AldyVec registers. */
2057 if (ALTIVEC_REGNO_P (regno))
2058 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2059 || mode == V1TImode);
2060
2061 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2062 and it must be able to fit within the register set. */
2063
2064 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2065 }
2066
2067 /* Implement TARGET_HARD_REGNO_NREGS. */
2068
2069 static unsigned int
2070 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2071 {
2072 return rs6000_hard_regno_nregs[mode][regno];
2073 }
2074
2075 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2076
2077 static bool
2078 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2079 {
2080 return rs6000_hard_regno_mode_ok_p[mode][regno];
2081 }
2082
2083 /* Implement TARGET_MODES_TIEABLE_P.
2084
2085 PTImode cannot tie with other modes because PTImode is restricted to even
2086 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2087 57744).
2088
2089 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2090 128-bit floating point on VSX systems ties with other vectors. */
2091
2092 static bool
2093 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2094 {
2095 if (mode1 == PTImode)
2096 return mode2 == PTImode;
2097 if (mode2 == PTImode)
2098 return false;
2099
2100 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2101 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2102 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2103 return false;
2104
2105 if (SCALAR_FLOAT_MODE_P (mode1))
2106 return SCALAR_FLOAT_MODE_P (mode2);
2107 if (SCALAR_FLOAT_MODE_P (mode2))
2108 return false;
2109
2110 if (GET_MODE_CLASS (mode1) == MODE_CC)
2111 return GET_MODE_CLASS (mode2) == MODE_CC;
2112 if (GET_MODE_CLASS (mode2) == MODE_CC)
2113 return false;
2114
2115 return true;
2116 }
2117
2118 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2119
2120 static bool
2121 rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
2122 unsigned int regno, machine_mode mode)
2123 {
2124 if (TARGET_32BIT
2125 && TARGET_POWERPC64
2126 && GET_MODE_SIZE (mode) > 4
2127 && INT_REGNO_P (regno))
2128 return true;
2129
2130 if (TARGET_VSX
2131 && FP_REGNO_P (regno)
2132 && GET_MODE_SIZE (mode) > 8
2133 && !FLOAT128_2REG_P (mode))
2134 return true;
2135
2136 return false;
2137 }
2138
2139 /* Print interesting facts about registers. */
2140 static void
2141 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2142 {
2143 int r, m;
2144
2145 for (r = first_regno; r <= last_regno; ++r)
2146 {
2147 const char *comma = "";
2148 int len;
2149
2150 if (first_regno == last_regno)
2151 fprintf (stderr, "%s:\t", reg_name);
2152 else
2153 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2154
2155 len = 8;
2156 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2157 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2158 {
2159 if (len > 70)
2160 {
2161 fprintf (stderr, ",\n\t");
2162 len = 8;
2163 comma = "";
2164 }
2165
2166 if (rs6000_hard_regno_nregs[m][r] > 1)
2167 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2168 rs6000_hard_regno_nregs[m][r]);
2169 else
2170 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2171
2172 comma = ", ";
2173 }
2174
2175 if (call_used_regs[r])
2176 {
2177 if (len > 70)
2178 {
2179 fprintf (stderr, ",\n\t");
2180 len = 8;
2181 comma = "";
2182 }
2183
2184 len += fprintf (stderr, "%s%s", comma, "call-used");
2185 comma = ", ";
2186 }
2187
2188 if (fixed_regs[r])
2189 {
2190 if (len > 70)
2191 {
2192 fprintf (stderr, ",\n\t");
2193 len = 8;
2194 comma = "";
2195 }
2196
2197 len += fprintf (stderr, "%s%s", comma, "fixed");
2198 comma = ", ";
2199 }
2200
2201 if (len > 70)
2202 {
2203 fprintf (stderr, ",\n\t");
2204 comma = "";
2205 }
2206
2207 len += fprintf (stderr, "%sreg-class = %s", comma,
2208 reg_class_names[(int)rs6000_regno_regclass[r]]);
2209 comma = ", ";
2210
2211 if (len > 70)
2212 {
2213 fprintf (stderr, ",\n\t");
2214 comma = "";
2215 }
2216
2217 fprintf (stderr, "%sregno = %d\n", comma, r);
2218 }
2219 }
2220
2221 static const char *
2222 rs6000_debug_vector_unit (enum rs6000_vector v)
2223 {
2224 const char *ret;
2225
2226 switch (v)
2227 {
2228 case VECTOR_NONE: ret = "none"; break;
2229 case VECTOR_ALTIVEC: ret = "altivec"; break;
2230 case VECTOR_VSX: ret = "vsx"; break;
2231 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2232 default: ret = "unknown"; break;
2233 }
2234
2235 return ret;
2236 }
2237
2238 /* Inner function printing just the address mask for a particular reload
2239 register class. */
2240 DEBUG_FUNCTION char *
2241 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2242 {
2243 static char ret[8];
2244 char *p = ret;
2245
2246 if ((mask & RELOAD_REG_VALID) != 0)
2247 *p++ = 'v';
2248 else if (keep_spaces)
2249 *p++ = ' ';
2250
2251 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2252 *p++ = 'm';
2253 else if (keep_spaces)
2254 *p++ = ' ';
2255
2256 if ((mask & RELOAD_REG_INDEXED) != 0)
2257 *p++ = 'i';
2258 else if (keep_spaces)
2259 *p++ = ' ';
2260
2261 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2262 *p++ = 'O';
2263 else if ((mask & RELOAD_REG_OFFSET) != 0)
2264 *p++ = 'o';
2265 else if (keep_spaces)
2266 *p++ = ' ';
2267
2268 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2269 *p++ = '+';
2270 else if (keep_spaces)
2271 *p++ = ' ';
2272
2273 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2274 *p++ = '+';
2275 else if (keep_spaces)
2276 *p++ = ' ';
2277
2278 if ((mask & RELOAD_REG_AND_M16) != 0)
2279 *p++ = '&';
2280 else if (keep_spaces)
2281 *p++ = ' ';
2282
2283 *p = '\0';
2284
2285 return ret;
2286 }
2287
2288 /* Print the address masks in a human readble fashion. */
2289 DEBUG_FUNCTION void
2290 rs6000_debug_print_mode (ssize_t m)
2291 {
2292 ssize_t rc;
2293 int spaces = 0;
2294
2295 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2296 for (rc = 0; rc < N_RELOAD_REG; rc++)
2297 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2298 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2299
2300 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2301 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2302 {
2303 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2304 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2305 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2306 spaces = 0;
2307 }
2308 else
2309 spaces += sizeof (" Reload=sl") - 1;
2310
2311 if (reg_addr[m].scalar_in_vmx_p)
2312 {
2313 fprintf (stderr, "%*s Upper=y", spaces, "");
2314 spaces = 0;
2315 }
2316 else
2317 spaces += sizeof (" Upper=y") - 1;
2318
2319 if (rs6000_vector_unit[m] != VECTOR_NONE
2320 || rs6000_vector_mem[m] != VECTOR_NONE)
2321 {
2322 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2323 spaces, "",
2324 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2325 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2326 }
2327
2328 fputs ("\n", stderr);
2329 }
2330
2331 #define DEBUG_FMT_ID "%-32s= "
2332 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2333 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2334 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2335
2336 /* Print various interesting information with -mdebug=reg. */
2337 static void
2338 rs6000_debug_reg_global (void)
2339 {
2340 static const char *const tf[2] = { "false", "true" };
2341 const char *nl = (const char *)0;
2342 int m;
2343 size_t m1, m2, v;
2344 char costly_num[20];
2345 char nop_num[20];
2346 char flags_buffer[40];
2347 const char *costly_str;
2348 const char *nop_str;
2349 const char *trace_str;
2350 const char *abi_str;
2351 const char *cmodel_str;
2352 struct cl_target_option cl_opts;
2353
2354 /* Modes we want tieable information on. */
2355 static const machine_mode print_tieable_modes[] = {
2356 QImode,
2357 HImode,
2358 SImode,
2359 DImode,
2360 TImode,
2361 PTImode,
2362 SFmode,
2363 DFmode,
2364 TFmode,
2365 IFmode,
2366 KFmode,
2367 SDmode,
2368 DDmode,
2369 TDmode,
2370 V16QImode,
2371 V8HImode,
2372 V4SImode,
2373 V2DImode,
2374 V1TImode,
2375 V32QImode,
2376 V16HImode,
2377 V8SImode,
2378 V4DImode,
2379 V2TImode,
2380 V4SFmode,
2381 V2DFmode,
2382 V8SFmode,
2383 V4DFmode,
2384 CCmode,
2385 CCUNSmode,
2386 CCEQmode,
2387 };
2388
2389 /* Virtual regs we are interested in. */
2390 const static struct {
2391 int regno; /* register number. */
2392 const char *name; /* register name. */
2393 } virtual_regs[] = {
2394 { STACK_POINTER_REGNUM, "stack pointer:" },
2395 { TOC_REGNUM, "toc: " },
2396 { STATIC_CHAIN_REGNUM, "static chain: " },
2397 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2398 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2399 { ARG_POINTER_REGNUM, "arg pointer: " },
2400 { FRAME_POINTER_REGNUM, "frame pointer:" },
2401 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2402 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2403 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2404 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2405 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2406 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2407 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2408 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2409 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2410 };
2411
2412 fputs ("\nHard register information:\n", stderr);
2413 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2414 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2415 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2416 LAST_ALTIVEC_REGNO,
2417 "vs");
2418 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2419 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2420 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2421 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2422 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2423 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2424
2425 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2426 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2427 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2428
2429 fprintf (stderr,
2430 "\n"
2431 "d reg_class = %s\n"
2432 "f reg_class = %s\n"
2433 "v reg_class = %s\n"
2434 "wa reg_class = %s\n"
2435 "we reg_class = %s\n"
2436 "wr reg_class = %s\n"
2437 "wx reg_class = %s\n"
2438 "wA reg_class = %s\n"
2439 "\n",
2440 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2441 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2442 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2443 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2444 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2445 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2446 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2447 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2448
2449 nl = "\n";
2450 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2451 rs6000_debug_print_mode (m);
2452
2453 fputs ("\n", stderr);
2454
2455 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2456 {
2457 machine_mode mode1 = print_tieable_modes[m1];
2458 bool first_time = true;
2459
2460 nl = (const char *)0;
2461 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2462 {
2463 machine_mode mode2 = print_tieable_modes[m2];
2464 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2465 {
2466 if (first_time)
2467 {
2468 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2469 nl = "\n";
2470 first_time = false;
2471 }
2472
2473 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2474 }
2475 }
2476
2477 if (!first_time)
2478 fputs ("\n", stderr);
2479 }
2480
2481 if (nl)
2482 fputs (nl, stderr);
2483
2484 if (rs6000_recip_control)
2485 {
2486 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2487
2488 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2489 if (rs6000_recip_bits[m])
2490 {
2491 fprintf (stderr,
2492 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2493 GET_MODE_NAME (m),
2494 (RS6000_RECIP_AUTO_RE_P (m)
2495 ? "auto"
2496 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2497 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2498 ? "auto"
2499 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2500 }
2501
2502 fputs ("\n", stderr);
2503 }
2504
2505 if (rs6000_cpu_index >= 0)
2506 {
2507 const char *name = processor_target_table[rs6000_cpu_index].name;
2508 HOST_WIDE_INT flags
2509 = processor_target_table[rs6000_cpu_index].target_enable;
2510
2511 sprintf (flags_buffer, "-mcpu=%s flags", name);
2512 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2513 }
2514 else
2515 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2516
2517 if (rs6000_tune_index >= 0)
2518 {
2519 const char *name = processor_target_table[rs6000_tune_index].name;
2520 HOST_WIDE_INT flags
2521 = processor_target_table[rs6000_tune_index].target_enable;
2522
2523 sprintf (flags_buffer, "-mtune=%s flags", name);
2524 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2525 }
2526 else
2527 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2528
2529 cl_target_option_save (&cl_opts, &global_options);
2530 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2531 rs6000_isa_flags);
2532
2533 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2534 rs6000_isa_flags_explicit);
2535
2536 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2537 rs6000_builtin_mask);
2538
2539 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2540
2541 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2542 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2543
2544 switch (rs6000_sched_costly_dep)
2545 {
2546 case max_dep_latency:
2547 costly_str = "max_dep_latency";
2548 break;
2549
2550 case no_dep_costly:
2551 costly_str = "no_dep_costly";
2552 break;
2553
2554 case all_deps_costly:
2555 costly_str = "all_deps_costly";
2556 break;
2557
2558 case true_store_to_load_dep_costly:
2559 costly_str = "true_store_to_load_dep_costly";
2560 break;
2561
2562 case store_to_load_dep_costly:
2563 costly_str = "store_to_load_dep_costly";
2564 break;
2565
2566 default:
2567 costly_str = costly_num;
2568 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2569 break;
2570 }
2571
2572 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2573
2574 switch (rs6000_sched_insert_nops)
2575 {
2576 case sched_finish_regroup_exact:
2577 nop_str = "sched_finish_regroup_exact";
2578 break;
2579
2580 case sched_finish_pad_groups:
2581 nop_str = "sched_finish_pad_groups";
2582 break;
2583
2584 case sched_finish_none:
2585 nop_str = "sched_finish_none";
2586 break;
2587
2588 default:
2589 nop_str = nop_num;
2590 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2591 break;
2592 }
2593
2594 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2595
2596 switch (rs6000_sdata)
2597 {
2598 default:
2599 case SDATA_NONE:
2600 break;
2601
2602 case SDATA_DATA:
2603 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2604 break;
2605
2606 case SDATA_SYSV:
2607 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2608 break;
2609
2610 case SDATA_EABI:
2611 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2612 break;
2613
2614 }
2615
2616 switch (rs6000_traceback)
2617 {
2618 case traceback_default: trace_str = "default"; break;
2619 case traceback_none: trace_str = "none"; break;
2620 case traceback_part: trace_str = "part"; break;
2621 case traceback_full: trace_str = "full"; break;
2622 default: trace_str = "unknown"; break;
2623 }
2624
2625 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2626
2627 switch (rs6000_current_cmodel)
2628 {
2629 case CMODEL_SMALL: cmodel_str = "small"; break;
2630 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2631 case CMODEL_LARGE: cmodel_str = "large"; break;
2632 default: cmodel_str = "unknown"; break;
2633 }
2634
2635 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2636
2637 switch (rs6000_current_abi)
2638 {
2639 case ABI_NONE: abi_str = "none"; break;
2640 case ABI_AIX: abi_str = "aix"; break;
2641 case ABI_ELFv2: abi_str = "ELFv2"; break;
2642 case ABI_V4: abi_str = "V4"; break;
2643 case ABI_DARWIN: abi_str = "darwin"; break;
2644 default: abi_str = "unknown"; break;
2645 }
2646
2647 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2648
2649 if (rs6000_altivec_abi)
2650 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2651
2652 if (rs6000_darwin64_abi)
2653 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2654
2655 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2656 (TARGET_SOFT_FLOAT ? "true" : "false"));
2657
2658 if (TARGET_LINK_STACK)
2659 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2660
2661 if (TARGET_P8_FUSION)
2662 {
2663 char options[80];
2664
2665 strcpy (options, "power8");
2666 if (TARGET_P8_FUSION_SIGN)
2667 strcat (options, ", sign");
2668
2669 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2670 }
2671
2672 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2673 TARGET_SECURE_PLT ? "secure" : "bss");
2674 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2675 aix_struct_return ? "aix" : "sysv");
2676 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2677 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2678 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2679 tf[!!rs6000_align_branch_targets]);
2680 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2681 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2682 rs6000_long_double_type_size);
2683 if (rs6000_long_double_type_size > 64)
2684 {
2685 fprintf (stderr, DEBUG_FMT_S, "long double type",
2686 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2687 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2688 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2689 }
2690 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2691 (int)rs6000_sched_restricted_insns_priority);
2692 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2693 (int)END_BUILTINS);
2694 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2695 (int)RS6000_BUILTIN_COUNT);
2696
2697 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2698 (int)TARGET_FLOAT128_ENABLE_TYPE);
2699
2700 if (TARGET_VSX)
2701 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2702 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2703
2704 if (TARGET_DIRECT_MOVE_128)
2705 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2706 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2707 }
2708
2709 \f
2710 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2711 legitimate address support to figure out the appropriate addressing to
2712 use. */
2713
2714 static void
2715 rs6000_setup_reg_addr_masks (void)
2716 {
2717 ssize_t rc, reg, m, nregs;
2718 addr_mask_type any_addr_mask, addr_mask;
2719
2720 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2721 {
2722 machine_mode m2 = (machine_mode) m;
2723 bool complex_p = false;
2724 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2725 size_t msize;
2726
2727 if (COMPLEX_MODE_P (m2))
2728 {
2729 complex_p = true;
2730 m2 = GET_MODE_INNER (m2);
2731 }
2732
2733 msize = GET_MODE_SIZE (m2);
2734
2735 /* SDmode is special in that we want to access it only via REG+REG
2736 addressing on power7 and above, since we want to use the LFIWZX and
2737 STFIWZX instructions to load it. */
2738 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2739
2740 any_addr_mask = 0;
2741 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2742 {
2743 addr_mask = 0;
2744 reg = reload_reg_map[rc].reg;
2745
2746 /* Can mode values go in the GPR/FPR/Altivec registers? */
2747 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2748 {
2749 bool small_int_vsx_p = (small_int_p
2750 && (rc == RELOAD_REG_FPR
2751 || rc == RELOAD_REG_VMX));
2752
2753 nregs = rs6000_hard_regno_nregs[m][reg];
2754 addr_mask |= RELOAD_REG_VALID;
2755
2756 /* Indicate if the mode takes more than 1 physical register. If
2757 it takes a single register, indicate it can do REG+REG
2758 addressing. Small integers in VSX registers can only do
2759 REG+REG addressing. */
2760 if (small_int_vsx_p)
2761 addr_mask |= RELOAD_REG_INDEXED;
2762 else if (nregs > 1 || m == BLKmode || complex_p)
2763 addr_mask |= RELOAD_REG_MULTIPLE;
2764 else
2765 addr_mask |= RELOAD_REG_INDEXED;
2766
2767 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2768 addressing. If we allow scalars into Altivec registers,
2769 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2770
2771 For VSX systems, we don't allow update addressing for
2772 DFmode/SFmode if those registers can go in both the
2773 traditional floating point registers and Altivec registers.
2774 The load/store instructions for the Altivec registers do not
2775 have update forms. If we allowed update addressing, it seems
2776 to break IV-OPT code using floating point if the index type is
2777 int instead of long (PR target/81550 and target/84042). */
2778
2779 if (TARGET_UPDATE
2780 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2781 && msize <= 8
2782 && !VECTOR_MODE_P (m2)
2783 && !FLOAT128_VECTOR_P (m2)
2784 && !complex_p
2785 && (m != E_DFmode || !TARGET_VSX)
2786 && (m != E_SFmode || !TARGET_P8_VECTOR)
2787 && !small_int_vsx_p)
2788 {
2789 addr_mask |= RELOAD_REG_PRE_INCDEC;
2790
2791 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2792 we don't allow PRE_MODIFY for some multi-register
2793 operations. */
2794 switch (m)
2795 {
2796 default:
2797 addr_mask |= RELOAD_REG_PRE_MODIFY;
2798 break;
2799
2800 case E_DImode:
2801 if (TARGET_POWERPC64)
2802 addr_mask |= RELOAD_REG_PRE_MODIFY;
2803 break;
2804
2805 case E_DFmode:
2806 case E_DDmode:
2807 if (TARGET_HARD_FLOAT)
2808 addr_mask |= RELOAD_REG_PRE_MODIFY;
2809 break;
2810 }
2811 }
2812 }
2813
2814 /* GPR and FPR registers can do REG+OFFSET addressing, except
2815 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2816 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2817 if ((addr_mask != 0) && !indexed_only_p
2818 && msize <= 8
2819 && (rc == RELOAD_REG_GPR
2820 || ((msize == 8 || m2 == SFmode)
2821 && (rc == RELOAD_REG_FPR
2822 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2823 addr_mask |= RELOAD_REG_OFFSET;
2824
2825 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2826 instructions are enabled. The offset for 128-bit VSX registers is
2827 only 12-bits. While GPRs can handle the full offset range, VSX
2828 registers can only handle the restricted range. */
2829 else if ((addr_mask != 0) && !indexed_only_p
2830 && msize == 16 && TARGET_P9_VECTOR
2831 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2832 || (m2 == TImode && TARGET_VSX)))
2833 {
2834 addr_mask |= RELOAD_REG_OFFSET;
2835 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2836 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2837 }
2838
2839 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2840 addressing on 128-bit types. */
2841 if (rc == RELOAD_REG_VMX && msize == 16
2842 && (addr_mask & RELOAD_REG_VALID) != 0)
2843 addr_mask |= RELOAD_REG_AND_M16;
2844
2845 reg_addr[m].addr_mask[rc] = addr_mask;
2846 any_addr_mask |= addr_mask;
2847 }
2848
2849 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2850 }
2851 }
2852
2853 \f
2854 /* Initialize the various global tables that are based on register size. */
2855 static void
2856 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2857 {
2858 ssize_t r, m, c;
2859 int align64;
2860 int align32;
2861
2862 /* Precalculate REGNO_REG_CLASS. */
2863 rs6000_regno_regclass[0] = GENERAL_REGS;
2864 for (r = 1; r < 32; ++r)
2865 rs6000_regno_regclass[r] = BASE_REGS;
2866
2867 for (r = 32; r < 64; ++r)
2868 rs6000_regno_regclass[r] = FLOAT_REGS;
2869
2870 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2871 rs6000_regno_regclass[r] = NO_REGS;
2872
2873 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2874 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2875
2876 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2877 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2878 rs6000_regno_regclass[r] = CR_REGS;
2879
2880 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2881 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2882 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2883 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2884 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2885 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2886 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2887
2888 /* Precalculate register class to simpler reload register class. We don't
2889 need all of the register classes that are combinations of different
2890 classes, just the simple ones that have constraint letters. */
2891 for (c = 0; c < N_REG_CLASSES; c++)
2892 reg_class_to_reg_type[c] = NO_REG_TYPE;
2893
2894 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2895 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2896 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2897 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2898 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2899 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2900 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2901 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2902 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2903 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2904
2905 if (TARGET_VSX)
2906 {
2907 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2908 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2909 }
2910 else
2911 {
2912 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2913 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2914 }
2915
2916 /* Precalculate the valid memory formats as well as the vector information,
2917 this must be set up before the rs6000_hard_regno_nregs_internal calls
2918 below. */
2919 gcc_assert ((int)VECTOR_NONE == 0);
2920 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2921 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2922
2923 gcc_assert ((int)CODE_FOR_nothing == 0);
2924 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2925
2926 gcc_assert ((int)NO_REGS == 0);
2927 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2928
2929 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2930 believes it can use native alignment or still uses 128-bit alignment. */
2931 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2932 {
2933 align64 = 64;
2934 align32 = 32;
2935 }
2936 else
2937 {
2938 align64 = 128;
2939 align32 = 128;
2940 }
2941
2942 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2943 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2944 if (TARGET_FLOAT128_TYPE)
2945 {
2946 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2947 rs6000_vector_align[KFmode] = 128;
2948
2949 if (FLOAT128_IEEE_P (TFmode))
2950 {
2951 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2952 rs6000_vector_align[TFmode] = 128;
2953 }
2954 }
2955
2956 /* V2DF mode, VSX only. */
2957 if (TARGET_VSX)
2958 {
2959 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2960 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2961 rs6000_vector_align[V2DFmode] = align64;
2962 }
2963
2964 /* V4SF mode, either VSX or Altivec. */
2965 if (TARGET_VSX)
2966 {
2967 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2968 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2969 rs6000_vector_align[V4SFmode] = align32;
2970 }
2971 else if (TARGET_ALTIVEC)
2972 {
2973 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2974 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2975 rs6000_vector_align[V4SFmode] = align32;
2976 }
2977
2978 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2979 and stores. */
2980 if (TARGET_ALTIVEC)
2981 {
2982 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2983 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2984 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2985 rs6000_vector_align[V4SImode] = align32;
2986 rs6000_vector_align[V8HImode] = align32;
2987 rs6000_vector_align[V16QImode] = align32;
2988
2989 if (TARGET_VSX)
2990 {
2991 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2992 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2993 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2994 }
2995 else
2996 {
2997 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2998 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2999 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3000 }
3001 }
3002
3003 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3004 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3005 if (TARGET_VSX)
3006 {
3007 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3008 rs6000_vector_unit[V2DImode]
3009 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3010 rs6000_vector_align[V2DImode] = align64;
3011
3012 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3013 rs6000_vector_unit[V1TImode]
3014 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3015 rs6000_vector_align[V1TImode] = 128;
3016 }
3017
3018 /* DFmode, see if we want to use the VSX unit. Memory is handled
3019 differently, so don't set rs6000_vector_mem. */
3020 if (TARGET_VSX)
3021 {
3022 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3023 rs6000_vector_align[DFmode] = 64;
3024 }
3025
3026 /* SFmode, see if we want to use the VSX unit. */
3027 if (TARGET_P8_VECTOR)
3028 {
3029 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3030 rs6000_vector_align[SFmode] = 32;
3031 }
3032
3033 /* Allow TImode in VSX register and set the VSX memory macros. */
3034 if (TARGET_VSX)
3035 {
3036 rs6000_vector_mem[TImode] = VECTOR_VSX;
3037 rs6000_vector_align[TImode] = align64;
3038 }
3039
3040 /* Register class constraints for the constraints that depend on compile
3041 switches. When the VSX code was added, different constraints were added
3042 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3043 of the VSX registers are used. The register classes for scalar floating
3044 point types is set, based on whether we allow that type into the upper
3045 (Altivec) registers. GCC has register classes to target the Altivec
3046 registers for load/store operations, to select using a VSX memory
3047 operation instead of the traditional floating point operation. The
3048 constraints are:
3049
3050 d - Register class to use with traditional DFmode instructions.
3051 f - Register class to use with traditional SFmode instructions.
3052 v - Altivec register.
3053 wa - Any VSX register.
3054 wc - Reserved to represent individual CR bits (used in LLVM).
3055 wn - always NO_REGS.
3056 wr - GPR if 64-bit mode is permitted.
3057 wx - Float register if we can do 32-bit int stores. */
3058
3059 if (TARGET_HARD_FLOAT)
3060 {
3061 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3062 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3063 }
3064
3065 if (TARGET_VSX)
3066 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3067
3068 /* Add conditional constraints based on various options, to allow us to
3069 collapse multiple insn patterns. */
3070 if (TARGET_ALTIVEC)
3071 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3072
3073 if (TARGET_POWERPC64)
3074 {
3075 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3076 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3077 }
3078
3079 if (TARGET_STFIWX)
3080 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3081
3082 /* Support for new direct moves (ISA 3.0 + 64bit). */
3083 if (TARGET_DIRECT_MOVE_128)
3084 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3085
3086 /* Set up the reload helper and direct move functions. */
3087 if (TARGET_VSX || TARGET_ALTIVEC)
3088 {
3089 if (TARGET_64BIT)
3090 {
3091 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3092 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3093 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3094 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3095 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3096 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3097 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3098 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3099 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3100 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3101 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3102 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3103 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3104 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3105 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3106 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3107 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3108 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3109 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3110 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3111
3112 if (FLOAT128_VECTOR_P (KFmode))
3113 {
3114 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3115 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3116 }
3117
3118 if (FLOAT128_VECTOR_P (TFmode))
3119 {
3120 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3121 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3122 }
3123
3124 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3125 available. */
3126 if (TARGET_NO_SDMODE_STACK)
3127 {
3128 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3129 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3130 }
3131
3132 if (TARGET_VSX)
3133 {
3134 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3135 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3136 }
3137
3138 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3139 {
3140 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3141 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3142 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3143 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3144 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3145 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3146 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3147 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3148 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3149
3150 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3151 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3152 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3153 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3154 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3155 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3156 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3157 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3158 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3159
3160 if (FLOAT128_VECTOR_P (KFmode))
3161 {
3162 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3163 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3164 }
3165
3166 if (FLOAT128_VECTOR_P (TFmode))
3167 {
3168 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3169 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3170 }
3171 }
3172 }
3173 else
3174 {
3175 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3176 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3177 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3178 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3179 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3180 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3181 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3182 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3183 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3184 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3185 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3186 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3187 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3188 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3189 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3190 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3191 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3192 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3193 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3194 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3195
3196 if (FLOAT128_VECTOR_P (KFmode))
3197 {
3198 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3199 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3200 }
3201
3202 if (FLOAT128_IEEE_P (TFmode))
3203 {
3204 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3205 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3206 }
3207
3208 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3209 available. */
3210 if (TARGET_NO_SDMODE_STACK)
3211 {
3212 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3213 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3214 }
3215
3216 if (TARGET_VSX)
3217 {
3218 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3219 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3220 }
3221
3222 if (TARGET_DIRECT_MOVE)
3223 {
3224 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3225 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3226 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3227 }
3228 }
3229
3230 reg_addr[DFmode].scalar_in_vmx_p = true;
3231 reg_addr[DImode].scalar_in_vmx_p = true;
3232
3233 if (TARGET_P8_VECTOR)
3234 {
3235 reg_addr[SFmode].scalar_in_vmx_p = true;
3236 reg_addr[SImode].scalar_in_vmx_p = true;
3237
3238 if (TARGET_P9_VECTOR)
3239 {
3240 reg_addr[HImode].scalar_in_vmx_p = true;
3241 reg_addr[QImode].scalar_in_vmx_p = true;
3242 }
3243 }
3244 }
3245
3246 /* Precalculate HARD_REGNO_NREGS. */
3247 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3248 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3249 rs6000_hard_regno_nregs[m][r]
3250 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3251
3252 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3253 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3254 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3255 rs6000_hard_regno_mode_ok_p[m][r]
3256 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3257
3258 /* Precalculate CLASS_MAX_NREGS sizes. */
3259 for (c = 0; c < LIM_REG_CLASSES; ++c)
3260 {
3261 int reg_size;
3262
3263 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3264 reg_size = UNITS_PER_VSX_WORD;
3265
3266 else if (c == ALTIVEC_REGS)
3267 reg_size = UNITS_PER_ALTIVEC_WORD;
3268
3269 else if (c == FLOAT_REGS)
3270 reg_size = UNITS_PER_FP_WORD;
3271
3272 else
3273 reg_size = UNITS_PER_WORD;
3274
3275 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3276 {
3277 machine_mode m2 = (machine_mode)m;
3278 int reg_size2 = reg_size;
3279
3280 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3281 in VSX. */
3282 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3283 reg_size2 = UNITS_PER_FP_WORD;
3284
3285 rs6000_class_max_nregs[m][c]
3286 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3287 }
3288 }
3289
3290 /* Calculate which modes to automatically generate code to use a the
3291 reciprocal divide and square root instructions. In the future, possibly
3292 automatically generate the instructions even if the user did not specify
3293 -mrecip. The older machines double precision reciprocal sqrt estimate is
3294 not accurate enough. */
3295 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3296 if (TARGET_FRES)
3297 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3298 if (TARGET_FRE)
3299 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3300 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3301 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3302 if (VECTOR_UNIT_VSX_P (V2DFmode))
3303 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3304
3305 if (TARGET_FRSQRTES)
3306 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3307 if (TARGET_FRSQRTE)
3308 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3309 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3310 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3311 if (VECTOR_UNIT_VSX_P (V2DFmode))
3312 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3313
3314 if (rs6000_recip_control)
3315 {
3316 if (!flag_finite_math_only)
3317 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3318 "-ffast-math");
3319 if (flag_trapping_math)
3320 warning (0, "%qs requires %qs or %qs", "-mrecip",
3321 "-fno-trapping-math", "-ffast-math");
3322 if (!flag_reciprocal_math)
3323 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3324 "-ffast-math");
3325 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3326 {
3327 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3328 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3329 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3330
3331 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3332 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3333 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3334
3335 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3336 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3337 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3338
3339 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3340 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3341 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3342
3343 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3344 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3345 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3346
3347 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3348 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3349 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3350
3351 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3352 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3353 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3354
3355 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3356 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3357 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3358 }
3359 }
3360
3361 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3362 legitimate address support to figure out the appropriate addressing to
3363 use. */
3364 rs6000_setup_reg_addr_masks ();
3365
3366 if (global_init_p || TARGET_DEBUG_TARGET)
3367 {
3368 if (TARGET_DEBUG_REG)
3369 rs6000_debug_reg_global ();
3370
3371 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3372 fprintf (stderr,
3373 "SImode variable mult cost = %d\n"
3374 "SImode constant mult cost = %d\n"
3375 "SImode short constant mult cost = %d\n"
3376 "DImode multipliciation cost = %d\n"
3377 "SImode division cost = %d\n"
3378 "DImode division cost = %d\n"
3379 "Simple fp operation cost = %d\n"
3380 "DFmode multiplication cost = %d\n"
3381 "SFmode division cost = %d\n"
3382 "DFmode division cost = %d\n"
3383 "cache line size = %d\n"
3384 "l1 cache size = %d\n"
3385 "l2 cache size = %d\n"
3386 "simultaneous prefetches = %d\n"
3387 "\n",
3388 rs6000_cost->mulsi,
3389 rs6000_cost->mulsi_const,
3390 rs6000_cost->mulsi_const9,
3391 rs6000_cost->muldi,
3392 rs6000_cost->divsi,
3393 rs6000_cost->divdi,
3394 rs6000_cost->fp,
3395 rs6000_cost->dmul,
3396 rs6000_cost->sdiv,
3397 rs6000_cost->ddiv,
3398 rs6000_cost->cache_line_size,
3399 rs6000_cost->l1_cache_size,
3400 rs6000_cost->l2_cache_size,
3401 rs6000_cost->simultaneous_prefetches);
3402 }
3403 }
3404
3405 #if TARGET_MACHO
3406 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3407
3408 static void
3409 darwin_rs6000_override_options (void)
3410 {
3411 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3412 off. */
3413 rs6000_altivec_abi = 1;
3414 TARGET_ALTIVEC_VRSAVE = 1;
3415 rs6000_current_abi = ABI_DARWIN;
3416
3417 if (DEFAULT_ABI == ABI_DARWIN
3418 && TARGET_64BIT)
3419 darwin_one_byte_bool = 1;
3420
3421 if (TARGET_64BIT && ! TARGET_POWERPC64)
3422 {
3423 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3424 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3425 }
3426 if (flag_mkernel)
3427 {
3428 rs6000_default_long_calls = 1;
3429 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3430 }
3431
3432 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3433 Altivec. */
3434 if (!flag_mkernel && !flag_apple_kext
3435 && TARGET_64BIT
3436 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3437 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3438
3439 /* Unless the user (not the configurer) has explicitly overridden
3440 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3441 G4 unless targeting the kernel. */
3442 if (!flag_mkernel
3443 && !flag_apple_kext
3444 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3445 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3446 && ! global_options_set.x_rs6000_cpu_index)
3447 {
3448 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3449 }
3450 }
3451 #endif
3452
3453 /* If not otherwise specified by a target, make 'long double' equivalent to
3454 'double'. */
3455
3456 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3457 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3458 #endif
3459
3460 /* Return the builtin mask of the various options used that could affect which
3461 builtins were used. In the past we used target_flags, but we've run out of
3462 bits, and some options are no longer in target_flags. */
3463
3464 HOST_WIDE_INT
3465 rs6000_builtin_mask_calculate (void)
3466 {
3467 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3468 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3469 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3470 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3471 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3472 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3473 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3474 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3475 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3476 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3477 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3478 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3479 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3480 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3481 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3482 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3483 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3484 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3485 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3486 | ((TARGET_LONG_DOUBLE_128
3487 && TARGET_HARD_FLOAT
3488 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3489 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3490 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3491 }
3492
3493 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3494 to clobber the XER[CA] bit because clobbering that bit without telling
3495 the compiler worked just fine with versions of GCC before GCC 5, and
3496 breaking a lot of older code in ways that are hard to track down is
3497 not such a great idea. */
3498
3499 static rtx_insn *
3500 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3501 vec<const char *> &/*constraints*/,
3502 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3503 {
3504 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3505 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3506 return NULL;
3507 }
3508
3509 /* Override command line options.
3510
3511 Combine build-specific configuration information with options
3512 specified on the command line to set various state variables which
3513 influence code generation, optimization, and expansion of built-in
3514 functions. Assure that command-line configuration preferences are
3515 compatible with each other and with the build configuration; issue
3516 warnings while adjusting configuration or error messages while
3517 rejecting configuration.
3518
3519 Upon entry to this function:
3520
3521 This function is called once at the beginning of
3522 compilation, and then again at the start and end of compiling
3523 each section of code that has a different configuration, as
3524 indicated, for example, by adding the
3525
3526 __attribute__((__target__("cpu=power9")))
3527
3528 qualifier to a function definition or, for example, by bracketing
3529 code between
3530
3531 #pragma GCC target("altivec")
3532
3533 and
3534
3535 #pragma GCC reset_options
3536
3537 directives. Parameter global_init_p is true for the initial
3538 invocation, which initializes global variables, and false for all
3539 subsequent invocations.
3540
3541
3542 Various global state information is assumed to be valid. This
3543 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3544 default CPU specified at build configure time, TARGET_DEFAULT,
3545 representing the default set of option flags for the default
3546 target, and global_options_set.x_rs6000_isa_flags, representing
3547 which options were requested on the command line.
3548
3549 Upon return from this function:
3550
3551 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3552 was set by name on the command line. Additionally, if certain
3553 attributes are automatically enabled or disabled by this function
3554 in order to assure compatibility between options and
3555 configuration, the flags associated with those attributes are
3556 also set. By setting these "explicit bits", we avoid the risk
3557 that other code might accidentally overwrite these particular
3558 attributes with "default values".
3559
3560 The various bits of rs6000_isa_flags are set to indicate the
3561 target options that have been selected for the most current
3562 compilation efforts. This has the effect of also turning on the
3563 associated TARGET_XXX values since these are macros which are
3564 generally defined to test the corresponding bit of the
3565 rs6000_isa_flags variable.
3566
3567 The variable rs6000_builtin_mask is set to represent the target
3568 options for the most current compilation efforts, consistent with
3569 the current contents of rs6000_isa_flags. This variable controls
3570 expansion of built-in functions.
3571
3572 Various other global variables and fields of global structures
3573 (over 50 in all) are initialized to reflect the desired options
3574 for the most current compilation efforts. */
3575
3576 static bool
3577 rs6000_option_override_internal (bool global_init_p)
3578 {
3579 bool ret = true;
3580
3581 HOST_WIDE_INT set_masks;
3582 HOST_WIDE_INT ignore_masks;
3583 int cpu_index = -1;
3584 int tune_index;
3585 struct cl_target_option *main_target_opt
3586 = ((global_init_p || target_option_default_node == NULL)
3587 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3588
3589 /* Print defaults. */
3590 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3591 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3592
3593 /* Remember the explicit arguments. */
3594 if (global_init_p)
3595 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3596
3597 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3598 library functions, so warn about it. The flag may be useful for
3599 performance studies from time to time though, so don't disable it
3600 entirely. */
3601 if (global_options_set.x_rs6000_alignment_flags
3602 && rs6000_alignment_flags == MASK_ALIGN_POWER
3603 && DEFAULT_ABI == ABI_DARWIN
3604 && TARGET_64BIT)
3605 warning (0, "%qs is not supported for 64-bit Darwin;"
3606 " it is incompatible with the installed C and C++ libraries",
3607 "-malign-power");
3608
3609 /* Numerous experiment shows that IRA based loop pressure
3610 calculation works better for RTL loop invariant motion on targets
3611 with enough (>= 32) registers. It is an expensive optimization.
3612 So it is on only for peak performance. */
3613 if (optimize >= 3 && global_init_p
3614 && !global_options_set.x_flag_ira_loop_pressure)
3615 flag_ira_loop_pressure = 1;
3616
3617 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3618 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3619 options were already specified. */
3620 if (flag_sanitize & SANITIZE_USER_ADDRESS
3621 && !global_options_set.x_flag_asynchronous_unwind_tables)
3622 flag_asynchronous_unwind_tables = 1;
3623
3624 /* Set the pointer size. */
3625 if (TARGET_64BIT)
3626 {
3627 rs6000_pmode = DImode;
3628 rs6000_pointer_size = 64;
3629 }
3630 else
3631 {
3632 rs6000_pmode = SImode;
3633 rs6000_pointer_size = 32;
3634 }
3635
3636 /* Some OSs don't support saving the high part of 64-bit registers on context
3637 switch. Other OSs don't support saving Altivec registers. On those OSs,
3638 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3639 if the user wants either, the user must explicitly specify them and we
3640 won't interfere with the user's specification. */
3641
3642 set_masks = POWERPC_MASKS;
3643 #ifdef OS_MISSING_POWERPC64
3644 if (OS_MISSING_POWERPC64)
3645 set_masks &= ~OPTION_MASK_POWERPC64;
3646 #endif
3647 #ifdef OS_MISSING_ALTIVEC
3648 if (OS_MISSING_ALTIVEC)
3649 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3650 | OTHER_VSX_VECTOR_MASKS);
3651 #endif
3652
3653 /* Don't override by the processor default if given explicitly. */
3654 set_masks &= ~rs6000_isa_flags_explicit;
3655
3656 if (global_init_p && rs6000_dejagnu_cpu_index >= 0)
3657 rs6000_cpu_index = rs6000_dejagnu_cpu_index;
3658
3659 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3660 the cpu in a target attribute or pragma, but did not specify a tuning
3661 option, use the cpu for the tuning option rather than the option specified
3662 with -mtune on the command line. Process a '--with-cpu' configuration
3663 request as an implicit --cpu. */
3664 if (rs6000_cpu_index >= 0)
3665 cpu_index = rs6000_cpu_index;
3666 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3667 cpu_index = main_target_opt->x_rs6000_cpu_index;
3668 else if (OPTION_TARGET_CPU_DEFAULT)
3669 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3670
3671 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3672 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3673 with those from the cpu, except for options that were explicitly set. If
3674 we don't have a cpu, do not override the target bits set in
3675 TARGET_DEFAULT. */
3676 if (cpu_index >= 0)
3677 {
3678 rs6000_cpu_index = cpu_index;
3679 rs6000_isa_flags &= ~set_masks;
3680 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3681 & set_masks);
3682 }
3683 else
3684 {
3685 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3686 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3687 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3688 to using rs6000_isa_flags, we need to do the initialization here.
3689
3690 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3691 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3692 HOST_WIDE_INT flags;
3693 if (TARGET_DEFAULT)
3694 flags = TARGET_DEFAULT;
3695 else
3696 {
3697 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3698 const char *default_cpu = (!TARGET_POWERPC64
3699 ? "powerpc"
3700 : (BYTES_BIG_ENDIAN
3701 ? "powerpc64"
3702 : "powerpc64le"));
3703 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3704 flags = processor_target_table[default_cpu_index].target_enable;
3705 }
3706 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3707 }
3708
3709 if (rs6000_tune_index >= 0)
3710 tune_index = rs6000_tune_index;
3711 else if (cpu_index >= 0)
3712 rs6000_tune_index = tune_index = cpu_index;
3713 else
3714 {
3715 size_t i;
3716 enum processor_type tune_proc
3717 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3718
3719 tune_index = -1;
3720 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3721 if (processor_target_table[i].processor == tune_proc)
3722 {
3723 tune_index = i;
3724 break;
3725 }
3726 }
3727
3728 if (cpu_index >= 0)
3729 rs6000_cpu = processor_target_table[cpu_index].processor;
3730 else
3731 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3732
3733 gcc_assert (tune_index >= 0);
3734 rs6000_tune = processor_target_table[tune_index].processor;
3735
3736 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3737 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3738 || rs6000_cpu == PROCESSOR_PPCE5500)
3739 {
3740 if (TARGET_ALTIVEC)
3741 error ("AltiVec not supported in this target");
3742 }
3743
3744 /* If we are optimizing big endian systems for space, use the load/store
3745 multiple instructions. */
3746 if (BYTES_BIG_ENDIAN && optimize_size)
3747 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3748
3749 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3750 because the hardware doesn't support the instructions used in little
3751 endian mode, and causes an alignment trap. The 750 does not cause an
3752 alignment trap (except when the target is unaligned). */
3753
3754 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3755 {
3756 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3757 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3758 warning (0, "%qs is not supported on little endian systems",
3759 "-mmultiple");
3760 }
3761
3762 /* If little-endian, default to -mstrict-align on older processors.
3763 Testing for htm matches power8 and later. */
3764 if (!BYTES_BIG_ENDIAN
3765 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3766 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3767
3768 if (!rs6000_fold_gimple)
3769 fprintf (stderr,
3770 "gimple folding of rs6000 builtins has been disabled.\n");
3771
3772 /* Add some warnings for VSX. */
3773 if (TARGET_VSX)
3774 {
3775 const char *msg = NULL;
3776 if (!TARGET_HARD_FLOAT)
3777 {
3778 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3779 msg = N_("%<-mvsx%> requires hardware floating point");
3780 else
3781 {
3782 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3783 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3784 }
3785 }
3786 else if (TARGET_AVOID_XFORM > 0)
3787 msg = N_("%<-mvsx%> needs indexed addressing");
3788 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3789 & OPTION_MASK_ALTIVEC))
3790 {
3791 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3792 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3793 else
3794 msg = N_("%<-mno-altivec%> disables vsx");
3795 }
3796
3797 if (msg)
3798 {
3799 warning (0, msg);
3800 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3801 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3802 }
3803 }
3804
3805 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3806 the -mcpu setting to enable options that conflict. */
3807 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3808 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3809 | OPTION_MASK_ALTIVEC
3810 | OPTION_MASK_VSX)) != 0)
3811 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3812 | OPTION_MASK_DIRECT_MOVE)
3813 & ~rs6000_isa_flags_explicit);
3814
3815 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3816 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3817
3818 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3819 off all of the options that depend on those flags. */
3820 ignore_masks = rs6000_disable_incompatible_switches ();
3821
3822 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3823 unless the user explicitly used the -mno-<option> to disable the code. */
3824 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3825 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3826 else if (TARGET_P9_MINMAX)
3827 {
3828 if (cpu_index >= 0)
3829 {
3830 if (cpu_index == PROCESSOR_POWER9)
3831 {
3832 /* legacy behavior: allow -mcpu=power9 with certain
3833 capabilities explicitly disabled. */
3834 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3835 }
3836 else
3837 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3838 "for <xxx> less than power9", "-mcpu");
3839 }
3840 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3841 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3842 & rs6000_isa_flags_explicit))
3843 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3844 were explicitly cleared. */
3845 error ("%qs incompatible with explicitly disabled options",
3846 "-mpower9-minmax");
3847 else
3848 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3849 }
3850 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3851 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3852 else if (TARGET_VSX)
3853 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3854 else if (TARGET_POPCNTD)
3855 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3856 else if (TARGET_DFP)
3857 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3858 else if (TARGET_CMPB)
3859 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3860 else if (TARGET_FPRND)
3861 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3862 else if (TARGET_POPCNTB)
3863 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3864 else if (TARGET_ALTIVEC)
3865 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3866
3867 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3868 {
3869 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3870 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3871 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3872 }
3873
3874 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3875 {
3876 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3877 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3878 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3879 }
3880
3881 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3882 {
3883 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3884 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3885 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3886 }
3887
3888 if (TARGET_P8_VECTOR && !TARGET_VSX)
3889 {
3890 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3891 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3892 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3893 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3894 {
3895 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3896 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3897 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3898 }
3899 else
3900 {
3901 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3902 not explicit. */
3903 rs6000_isa_flags |= OPTION_MASK_VSX;
3904 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3905 }
3906 }
3907
3908 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3909 {
3910 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3911 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3912 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3913 }
3914
3915 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3916 silently turn off quad memory mode. */
3917 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3918 {
3919 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3920 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3921
3922 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3923 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3924
3925 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3926 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3927 }
3928
3929 /* Non-atomic quad memory load/store are disabled for little endian, since
3930 the words are reversed, but atomic operations can still be done by
3931 swapping the words. */
3932 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3933 {
3934 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3935 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3936 "mode"));
3937
3938 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3939 }
3940
3941 /* Assume if the user asked for normal quad memory instructions, they want
3942 the atomic versions as well, unless they explicity told us not to use quad
3943 word atomic instructions. */
3944 if (TARGET_QUAD_MEMORY
3945 && !TARGET_QUAD_MEMORY_ATOMIC
3946 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3947 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3948
3949 /* If we can shrink-wrap the TOC register save separately, then use
3950 -msave-toc-indirect unless explicitly disabled. */
3951 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3952 && flag_shrink_wrap_separate
3953 && optimize_function_for_speed_p (cfun))
3954 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3955
3956 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3957 generating power8 instructions. Power9 does not optimize power8 fusion
3958 cases. */
3959 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3960 {
3961 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3962 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3963 else
3964 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3965 }
3966
3967 /* Setting additional fusion flags turns on base fusion. */
3968 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3969 {
3970 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3971 {
3972 if (TARGET_P8_FUSION_SIGN)
3973 error ("%qs requires %qs", "-mpower8-fusion-sign",
3974 "-mpower8-fusion");
3975
3976 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3977 }
3978 else
3979 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3980 }
3981
3982 /* Power8 does not fuse sign extended loads with the addis. If we are
3983 optimizing at high levels for speed, convert a sign extended load into a
3984 zero extending load, and an explicit sign extension. */
3985 if (TARGET_P8_FUSION
3986 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3987 && optimize_function_for_speed_p (cfun)
3988 && optimize >= 3)
3989 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3990
3991 /* ISA 3.0 vector instructions include ISA 2.07. */
3992 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3993 {
3994 /* We prefer to not mention undocumented options in
3995 error messages. However, if users have managed to select
3996 power9-vector without selecting power8-vector, they
3997 already know about undocumented flags. */
3998 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3999 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4000 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4001 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4002 {
4003 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4004 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4005 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4006 }
4007 else
4008 {
4009 /* OPTION_MASK_P9_VECTOR is explicit and
4010 OPTION_MASK_P8_VECTOR is not explicit. */
4011 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4012 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4013 }
4014 }
4015
4016 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4017 support. If we only have ISA 2.06 support, and the user did not specify
4018 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4019 but we don't enable the full vectorization support */
4020 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4021 TARGET_ALLOW_MOVMISALIGN = 1;
4022
4023 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4024 {
4025 if (TARGET_ALLOW_MOVMISALIGN > 0
4026 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4027 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4028
4029 TARGET_ALLOW_MOVMISALIGN = 0;
4030 }
4031
4032 /* Determine when unaligned vector accesses are permitted, and when
4033 they are preferred over masked Altivec loads. Note that if
4034 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4035 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4036 not true. */
4037 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4038 {
4039 if (!TARGET_VSX)
4040 {
4041 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4042 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4043
4044 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4045 }
4046
4047 else if (!TARGET_ALLOW_MOVMISALIGN)
4048 {
4049 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4050 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4051 "-mallow-movmisalign");
4052
4053 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4054 }
4055 }
4056
4057 /* Use long double size to select the appropriate long double. We use
4058 TYPE_PRECISION to differentiate the 3 different long double types. We map
4059 128 into the precision used for TFmode. */
4060 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4061 ? 64
4062 : FLOAT_PRECISION_TFmode);
4063
4064 /* Set long double size before the IEEE 128-bit tests. */
4065 if (!global_options_set.x_rs6000_long_double_type_size)
4066 {
4067 if (main_target_opt != NULL
4068 && (main_target_opt->x_rs6000_long_double_type_size
4069 != default_long_double_size))
4070 error ("target attribute or pragma changes %<long double%> size");
4071 else
4072 rs6000_long_double_type_size = default_long_double_size;
4073 }
4074 else if (rs6000_long_double_type_size == 128)
4075 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4076 else if (global_options_set.x_rs6000_ieeequad)
4077 {
4078 if (global_options.x_rs6000_ieeequad)
4079 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4080 else
4081 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4082 }
4083
4084 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4085 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4086 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4087 those systems will not pick up this default. Warn if the user changes the
4088 default unless -Wno-psabi. */
4089 if (!global_options_set.x_rs6000_ieeequad)
4090 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4091
4092 else
4093 {
4094 if (global_options.x_rs6000_ieeequad
4095 && (!TARGET_POPCNTD || !TARGET_VSX))
4096 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4097
4098 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4099 {
4100 static bool warned_change_long_double;
4101 if (!warned_change_long_double)
4102 {
4103 warned_change_long_double = true;
4104 if (TARGET_IEEEQUAD)
4105 warning (OPT_Wpsabi, "Using IEEE extended precision "
4106 "%<long double%>");
4107 else
4108 warning (OPT_Wpsabi, "Using IBM extended precision "
4109 "%<long double%>");
4110 }
4111 }
4112 }
4113
4114 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4115 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4116 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4117 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4118 the keyword as well as the type. */
4119 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4120
4121 /* IEEE 128-bit floating point requires VSX support. */
4122 if (TARGET_FLOAT128_KEYWORD)
4123 {
4124 if (!TARGET_VSX)
4125 {
4126 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4127 error ("%qs requires VSX support", "%<-mfloat128%>");
4128
4129 TARGET_FLOAT128_TYPE = 0;
4130 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4131 | OPTION_MASK_FLOAT128_HW);
4132 }
4133 else if (!TARGET_FLOAT128_TYPE)
4134 {
4135 TARGET_FLOAT128_TYPE = 1;
4136 warning (0, "The %<-mfloat128%> option may not be fully supported");
4137 }
4138 }
4139
4140 /* Enable the __float128 keyword under Linux by default. */
4141 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4142 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4143 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4144
4145 /* If we have are supporting the float128 type and full ISA 3.0 support,
4146 enable -mfloat128-hardware by default. However, don't enable the
4147 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4148 because sometimes the compiler wants to put things in an integer
4149 container, and if we don't have __int128 support, it is impossible. */
4150 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4151 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4152 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4153 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4154
4155 if (TARGET_FLOAT128_HW
4156 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4157 {
4158 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4159 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4160
4161 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4162 }
4163
4164 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4165 {
4166 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4167 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4168
4169 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4170 }
4171
4172 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4173 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4174 {
4175 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4176 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4177 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4178 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4179
4180 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4181 }
4182
4183 /* -mpcrel requires prefixed load/store addressing. */
4184 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4185 {
4186 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4187 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4188
4189 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4190 }
4191
4192 /* Print the options after updating the defaults. */
4193 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4194 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4195
4196 /* E500mc does "better" if we inline more aggressively. Respect the
4197 user's opinion, though. */
4198 if (rs6000_block_move_inline_limit == 0
4199 && (rs6000_tune == PROCESSOR_PPCE500MC
4200 || rs6000_tune == PROCESSOR_PPCE500MC64
4201 || rs6000_tune == PROCESSOR_PPCE5500
4202 || rs6000_tune == PROCESSOR_PPCE6500))
4203 rs6000_block_move_inline_limit = 128;
4204
4205 /* store_one_arg depends on expand_block_move to handle at least the
4206 size of reg_parm_stack_space. */
4207 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4208 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4209
4210 if (global_init_p)
4211 {
4212 /* If the appropriate debug option is enabled, replace the target hooks
4213 with debug versions that call the real version and then prints
4214 debugging information. */
4215 if (TARGET_DEBUG_COST)
4216 {
4217 targetm.rtx_costs = rs6000_debug_rtx_costs;
4218 targetm.address_cost = rs6000_debug_address_cost;
4219 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4220 }
4221
4222 if (TARGET_DEBUG_ADDR)
4223 {
4224 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4225 targetm.legitimize_address = rs6000_debug_legitimize_address;
4226 rs6000_secondary_reload_class_ptr
4227 = rs6000_debug_secondary_reload_class;
4228 targetm.secondary_memory_needed
4229 = rs6000_debug_secondary_memory_needed;
4230 targetm.can_change_mode_class
4231 = rs6000_debug_can_change_mode_class;
4232 rs6000_preferred_reload_class_ptr
4233 = rs6000_debug_preferred_reload_class;
4234 rs6000_mode_dependent_address_ptr
4235 = rs6000_debug_mode_dependent_address;
4236 }
4237
4238 if (rs6000_veclibabi_name)
4239 {
4240 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4241 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4242 else
4243 {
4244 error ("unknown vectorization library ABI type (%qs) for "
4245 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4246 ret = false;
4247 }
4248 }
4249 }
4250
4251 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4252 target attribute or pragma which automatically enables both options,
4253 unless the altivec ABI was set. This is set by default for 64-bit, but
4254 not for 32-bit. */
4255 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4256 {
4257 TARGET_FLOAT128_TYPE = 0;
4258 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4259 | OPTION_MASK_FLOAT128_KEYWORD)
4260 & ~rs6000_isa_flags_explicit);
4261 }
4262
4263 /* Enable Altivec ABI for AIX -maltivec. */
4264 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4265 {
4266 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4267 error ("target attribute or pragma changes AltiVec ABI");
4268 else
4269 rs6000_altivec_abi = 1;
4270 }
4271
4272 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4273 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4274 be explicitly overridden in either case. */
4275 if (TARGET_ELF)
4276 {
4277 if (!global_options_set.x_rs6000_altivec_abi
4278 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4279 {
4280 if (main_target_opt != NULL &&
4281 !main_target_opt->x_rs6000_altivec_abi)
4282 error ("target attribute or pragma changes AltiVec ABI");
4283 else
4284 rs6000_altivec_abi = 1;
4285 }
4286 }
4287
4288 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4289 So far, the only darwin64 targets are also MACH-O. */
4290 if (TARGET_MACHO
4291 && DEFAULT_ABI == ABI_DARWIN
4292 && TARGET_64BIT)
4293 {
4294 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4295 error ("target attribute or pragma changes darwin64 ABI");
4296 else
4297 {
4298 rs6000_darwin64_abi = 1;
4299 /* Default to natural alignment, for better performance. */
4300 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4301 }
4302 }
4303
4304 /* Place FP constants in the constant pool instead of TOC
4305 if section anchors enabled. */
4306 if (flag_section_anchors
4307 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4308 TARGET_NO_FP_IN_TOC = 1;
4309
4310 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4311 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4312
4313 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4314 SUBTARGET_OVERRIDE_OPTIONS;
4315 #endif
4316 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4317 SUBSUBTARGET_OVERRIDE_OPTIONS;
4318 #endif
4319 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4320 SUB3TARGET_OVERRIDE_OPTIONS;
4321 #endif
4322
4323 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4324 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4325
4326 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4327 && rs6000_tune != PROCESSOR_POWER5
4328 && rs6000_tune != PROCESSOR_POWER6
4329 && rs6000_tune != PROCESSOR_POWER7
4330 && rs6000_tune != PROCESSOR_POWER8
4331 && rs6000_tune != PROCESSOR_POWER9
4332 && rs6000_tune != PROCESSOR_FUTURE
4333 && rs6000_tune != PROCESSOR_PPCA2
4334 && rs6000_tune != PROCESSOR_CELL
4335 && rs6000_tune != PROCESSOR_PPC476);
4336 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4337 || rs6000_tune == PROCESSOR_POWER5
4338 || rs6000_tune == PROCESSOR_POWER7
4339 || rs6000_tune == PROCESSOR_POWER8);
4340 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4341 || rs6000_tune == PROCESSOR_POWER5
4342 || rs6000_tune == PROCESSOR_POWER6
4343 || rs6000_tune == PROCESSOR_POWER7
4344 || rs6000_tune == PROCESSOR_POWER8
4345 || rs6000_tune == PROCESSOR_POWER9
4346 || rs6000_tune == PROCESSOR_FUTURE
4347 || rs6000_tune == PROCESSOR_PPCE500MC
4348 || rs6000_tune == PROCESSOR_PPCE500MC64
4349 || rs6000_tune == PROCESSOR_PPCE5500
4350 || rs6000_tune == PROCESSOR_PPCE6500);
4351
4352 /* Allow debug switches to override the above settings. These are set to -1
4353 in rs6000.opt to indicate the user hasn't directly set the switch. */
4354 if (TARGET_ALWAYS_HINT >= 0)
4355 rs6000_always_hint = TARGET_ALWAYS_HINT;
4356
4357 if (TARGET_SCHED_GROUPS >= 0)
4358 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4359
4360 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4361 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4362
4363 rs6000_sched_restricted_insns_priority
4364 = (rs6000_sched_groups ? 1 : 0);
4365
4366 /* Handle -msched-costly-dep option. */
4367 rs6000_sched_costly_dep
4368 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4369
4370 if (rs6000_sched_costly_dep_str)
4371 {
4372 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4373 rs6000_sched_costly_dep = no_dep_costly;
4374 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4375 rs6000_sched_costly_dep = all_deps_costly;
4376 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4377 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4378 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4379 rs6000_sched_costly_dep = store_to_load_dep_costly;
4380 else
4381 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4382 atoi (rs6000_sched_costly_dep_str));
4383 }
4384
4385 /* Handle -minsert-sched-nops option. */
4386 rs6000_sched_insert_nops
4387 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4388
4389 if (rs6000_sched_insert_nops_str)
4390 {
4391 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4392 rs6000_sched_insert_nops = sched_finish_none;
4393 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4394 rs6000_sched_insert_nops = sched_finish_pad_groups;
4395 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4396 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4397 else
4398 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4399 atoi (rs6000_sched_insert_nops_str));
4400 }
4401
4402 /* Handle stack protector */
4403 if (!global_options_set.x_rs6000_stack_protector_guard)
4404 #ifdef TARGET_THREAD_SSP_OFFSET
4405 rs6000_stack_protector_guard = SSP_TLS;
4406 #else
4407 rs6000_stack_protector_guard = SSP_GLOBAL;
4408 #endif
4409
4410 #ifdef TARGET_THREAD_SSP_OFFSET
4411 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4412 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4413 #endif
4414
4415 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4416 {
4417 char *endp;
4418 const char *str = rs6000_stack_protector_guard_offset_str;
4419
4420 errno = 0;
4421 long offset = strtol (str, &endp, 0);
4422 if (!*str || *endp || errno)
4423 error ("%qs is not a valid number in %qs", str,
4424 "-mstack-protector-guard-offset=");
4425
4426 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4427 || (TARGET_64BIT && (offset & 3)))
4428 error ("%qs is not a valid offset in %qs", str,
4429 "-mstack-protector-guard-offset=");
4430
4431 rs6000_stack_protector_guard_offset = offset;
4432 }
4433
4434 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4435 {
4436 const char *str = rs6000_stack_protector_guard_reg_str;
4437 int reg = decode_reg_name (str);
4438
4439 if (!IN_RANGE (reg, 1, 31))
4440 error ("%qs is not a valid base register in %qs", str,
4441 "-mstack-protector-guard-reg=");
4442
4443 rs6000_stack_protector_guard_reg = reg;
4444 }
4445
4446 if (rs6000_stack_protector_guard == SSP_TLS
4447 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4448 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4449
4450 if (global_init_p)
4451 {
4452 #ifdef TARGET_REGNAMES
4453 /* If the user desires alternate register names, copy in the
4454 alternate names now. */
4455 if (TARGET_REGNAMES)
4456 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4457 #endif
4458
4459 /* Set aix_struct_return last, after the ABI is determined.
4460 If -maix-struct-return or -msvr4-struct-return was explicitly
4461 used, don't override with the ABI default. */
4462 if (!global_options_set.x_aix_struct_return)
4463 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4464
4465 #if 0
4466 /* IBM XL compiler defaults to unsigned bitfields. */
4467 if (TARGET_XL_COMPAT)
4468 flag_signed_bitfields = 0;
4469 #endif
4470
4471 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4472 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4473
4474 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4475
4476 /* We can only guarantee the availability of DI pseudo-ops when
4477 assembling for 64-bit targets. */
4478 if (!TARGET_64BIT)
4479 {
4480 targetm.asm_out.aligned_op.di = NULL;
4481 targetm.asm_out.unaligned_op.di = NULL;
4482 }
4483
4484
4485 /* Set branch target alignment, if not optimizing for size. */
4486 if (!optimize_size)
4487 {
4488 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4489 aligned 8byte to avoid misprediction by the branch predictor. */
4490 if (rs6000_tune == PROCESSOR_TITAN
4491 || rs6000_tune == PROCESSOR_CELL)
4492 {
4493 if (flag_align_functions && !str_align_functions)
4494 str_align_functions = "8";
4495 if (flag_align_jumps && !str_align_jumps)
4496 str_align_jumps = "8";
4497 if (flag_align_loops && !str_align_loops)
4498 str_align_loops = "8";
4499 }
4500 if (rs6000_align_branch_targets)
4501 {
4502 if (flag_align_functions && !str_align_functions)
4503 str_align_functions = "16";
4504 if (flag_align_jumps && !str_align_jumps)
4505 str_align_jumps = "16";
4506 if (flag_align_loops && !str_align_loops)
4507 {
4508 can_override_loop_align = 1;
4509 str_align_loops = "16";
4510 }
4511 }
4512
4513 if (flag_align_jumps && !str_align_jumps)
4514 str_align_jumps = "16";
4515 if (flag_align_loops && !str_align_loops)
4516 str_align_loops = "16";
4517 }
4518
4519 /* Arrange to save and restore machine status around nested functions. */
4520 init_machine_status = rs6000_init_machine_status;
4521
4522 /* We should always be splitting complex arguments, but we can't break
4523 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4524 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4525 targetm.calls.split_complex_arg = NULL;
4526
4527 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4528 if (DEFAULT_ABI == ABI_AIX)
4529 targetm.calls.custom_function_descriptors = 0;
4530 }
4531
4532 /* Initialize rs6000_cost with the appropriate target costs. */
4533 if (optimize_size)
4534 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4535 else
4536 switch (rs6000_tune)
4537 {
4538 case PROCESSOR_RS64A:
4539 rs6000_cost = &rs64a_cost;
4540 break;
4541
4542 case PROCESSOR_MPCCORE:
4543 rs6000_cost = &mpccore_cost;
4544 break;
4545
4546 case PROCESSOR_PPC403:
4547 rs6000_cost = &ppc403_cost;
4548 break;
4549
4550 case PROCESSOR_PPC405:
4551 rs6000_cost = &ppc405_cost;
4552 break;
4553
4554 case PROCESSOR_PPC440:
4555 rs6000_cost = &ppc440_cost;
4556 break;
4557
4558 case PROCESSOR_PPC476:
4559 rs6000_cost = &ppc476_cost;
4560 break;
4561
4562 case PROCESSOR_PPC601:
4563 rs6000_cost = &ppc601_cost;
4564 break;
4565
4566 case PROCESSOR_PPC603:
4567 rs6000_cost = &ppc603_cost;
4568 break;
4569
4570 case PROCESSOR_PPC604:
4571 rs6000_cost = &ppc604_cost;
4572 break;
4573
4574 case PROCESSOR_PPC604e:
4575 rs6000_cost = &ppc604e_cost;
4576 break;
4577
4578 case PROCESSOR_PPC620:
4579 rs6000_cost = &ppc620_cost;
4580 break;
4581
4582 case PROCESSOR_PPC630:
4583 rs6000_cost = &ppc630_cost;
4584 break;
4585
4586 case PROCESSOR_CELL:
4587 rs6000_cost = &ppccell_cost;
4588 break;
4589
4590 case PROCESSOR_PPC750:
4591 case PROCESSOR_PPC7400:
4592 rs6000_cost = &ppc750_cost;
4593 break;
4594
4595 case PROCESSOR_PPC7450:
4596 rs6000_cost = &ppc7450_cost;
4597 break;
4598
4599 case PROCESSOR_PPC8540:
4600 case PROCESSOR_PPC8548:
4601 rs6000_cost = &ppc8540_cost;
4602 break;
4603
4604 case PROCESSOR_PPCE300C2:
4605 case PROCESSOR_PPCE300C3:
4606 rs6000_cost = &ppce300c2c3_cost;
4607 break;
4608
4609 case PROCESSOR_PPCE500MC:
4610 rs6000_cost = &ppce500mc_cost;
4611 break;
4612
4613 case PROCESSOR_PPCE500MC64:
4614 rs6000_cost = &ppce500mc64_cost;
4615 break;
4616
4617 case PROCESSOR_PPCE5500:
4618 rs6000_cost = &ppce5500_cost;
4619 break;
4620
4621 case PROCESSOR_PPCE6500:
4622 rs6000_cost = &ppce6500_cost;
4623 break;
4624
4625 case PROCESSOR_TITAN:
4626 rs6000_cost = &titan_cost;
4627 break;
4628
4629 case PROCESSOR_POWER4:
4630 case PROCESSOR_POWER5:
4631 rs6000_cost = &power4_cost;
4632 break;
4633
4634 case PROCESSOR_POWER6:
4635 rs6000_cost = &power6_cost;
4636 break;
4637
4638 case PROCESSOR_POWER7:
4639 rs6000_cost = &power7_cost;
4640 break;
4641
4642 case PROCESSOR_POWER8:
4643 rs6000_cost = &power8_cost;
4644 break;
4645
4646 case PROCESSOR_POWER9:
4647 case PROCESSOR_FUTURE:
4648 rs6000_cost = &power9_cost;
4649 break;
4650
4651 case PROCESSOR_PPCA2:
4652 rs6000_cost = &ppca2_cost;
4653 break;
4654
4655 default:
4656 gcc_unreachable ();
4657 }
4658
4659 if (global_init_p)
4660 {
4661 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4662 rs6000_cost->simultaneous_prefetches,
4663 global_options.x_param_values,
4664 global_options_set.x_param_values);
4665 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4666 global_options.x_param_values,
4667 global_options_set.x_param_values);
4668 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4669 rs6000_cost->cache_line_size,
4670 global_options.x_param_values,
4671 global_options_set.x_param_values);
4672 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4673 global_options.x_param_values,
4674 global_options_set.x_param_values);
4675
4676 /* Increase loop peeling limits based on performance analysis. */
4677 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4678 global_options.x_param_values,
4679 global_options_set.x_param_values);
4680 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4681 global_options.x_param_values,
4682 global_options_set.x_param_values);
4683
4684 /* Use the 'model' -fsched-pressure algorithm by default. */
4685 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4686 SCHED_PRESSURE_MODEL,
4687 global_options.x_param_values,
4688 global_options_set.x_param_values);
4689
4690 /* If using typedef char *va_list, signal that
4691 __builtin_va_start (&ap, 0) can be optimized to
4692 ap = __builtin_next_arg (0). */
4693 if (DEFAULT_ABI != ABI_V4)
4694 targetm.expand_builtin_va_start = NULL;
4695 }
4696
4697 /* If not explicitly specified via option, decide whether to generate indexed
4698 load/store instructions. A value of -1 indicates that the
4699 initial value of this variable has not been overwritten. During
4700 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4701 if (TARGET_AVOID_XFORM == -1)
4702 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4703 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4704 need indexed accesses and the type used is the scalar type of the element
4705 being loaded or stored. */
4706 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4707 && !TARGET_ALTIVEC);
4708
4709 /* Set the -mrecip options. */
4710 if (rs6000_recip_name)
4711 {
4712 char *p = ASTRDUP (rs6000_recip_name);
4713 char *q;
4714 unsigned int mask, i;
4715 bool invert;
4716
4717 while ((q = strtok (p, ",")) != NULL)
4718 {
4719 p = NULL;
4720 if (*q == '!')
4721 {
4722 invert = true;
4723 q++;
4724 }
4725 else
4726 invert = false;
4727
4728 if (!strcmp (q, "default"))
4729 mask = ((TARGET_RECIP_PRECISION)
4730 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4731 else
4732 {
4733 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4734 if (!strcmp (q, recip_options[i].string))
4735 {
4736 mask = recip_options[i].mask;
4737 break;
4738 }
4739
4740 if (i == ARRAY_SIZE (recip_options))
4741 {
4742 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4743 invert = false;
4744 mask = 0;
4745 ret = false;
4746 }
4747 }
4748
4749 if (invert)
4750 rs6000_recip_control &= ~mask;
4751 else
4752 rs6000_recip_control |= mask;
4753 }
4754 }
4755
4756 /* Set the builtin mask of the various options used that could affect which
4757 builtins were used. In the past we used target_flags, but we've run out
4758 of bits, and some options are no longer in target_flags. */
4759 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4760 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4761 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4762 rs6000_builtin_mask);
4763
4764 /* Initialize all of the registers. */
4765 rs6000_init_hard_regno_mode_ok (global_init_p);
4766
4767 /* Save the initial options in case the user does function specific options */
4768 if (global_init_p)
4769 target_option_default_node = target_option_current_node
4770 = build_target_option_node (&global_options);
4771
4772 /* If not explicitly specified via option, decide whether to generate the
4773 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4774 if (TARGET_LINK_STACK == -1)
4775 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4776
4777 /* Deprecate use of -mno-speculate-indirect-jumps. */
4778 if (!rs6000_speculate_indirect_jumps)
4779 warning (0, "%qs is deprecated and not recommended in any circumstances",
4780 "-mno-speculate-indirect-jumps");
4781
4782 return ret;
4783 }
4784
4785 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4786 define the target cpu type. */
4787
4788 static void
4789 rs6000_option_override (void)
4790 {
4791 (void) rs6000_option_override_internal (true);
4792 }
4793
4794 \f
4795 /* Implement targetm.vectorize.builtin_mask_for_load. */
4796 static tree
4797 rs6000_builtin_mask_for_load (void)
4798 {
4799 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4800 if ((TARGET_ALTIVEC && !TARGET_VSX)
4801 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4802 return altivec_builtin_mask_for_load;
4803 else
4804 return 0;
4805 }
4806
4807 /* Implement LOOP_ALIGN. */
4808 align_flags
4809 rs6000_loop_align (rtx label)
4810 {
4811 basic_block bb;
4812 int ninsns;
4813
4814 /* Don't override loop alignment if -falign-loops was specified. */
4815 if (!can_override_loop_align)
4816 return align_loops;
4817
4818 bb = BLOCK_FOR_INSN (label);
4819 ninsns = num_loop_insns(bb->loop_father);
4820
4821 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4822 if (ninsns > 4 && ninsns <= 8
4823 && (rs6000_tune == PROCESSOR_POWER4
4824 || rs6000_tune == PROCESSOR_POWER5
4825 || rs6000_tune == PROCESSOR_POWER6
4826 || rs6000_tune == PROCESSOR_POWER7
4827 || rs6000_tune == PROCESSOR_POWER8))
4828 return align_flags (5);
4829 else
4830 return align_loops;
4831 }
4832
4833 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4834 after applying N number of iterations. This routine does not determine
4835 how may iterations are required to reach desired alignment. */
4836
4837 static bool
4838 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4839 {
4840 if (is_packed)
4841 return false;
4842
4843 if (TARGET_32BIT)
4844 {
4845 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4846 return true;
4847
4848 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4849 return true;
4850
4851 return false;
4852 }
4853 else
4854 {
4855 if (TARGET_MACHO)
4856 return false;
4857
4858 /* Assuming that all other types are naturally aligned. CHECKME! */
4859 return true;
4860 }
4861 }
4862
4863 /* Return true if the vector misalignment factor is supported by the
4864 target. */
4865 static bool
4866 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4867 const_tree type,
4868 int misalignment,
4869 bool is_packed)
4870 {
4871 if (TARGET_VSX)
4872 {
4873 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4874 return true;
4875
4876 /* Return if movmisalign pattern is not supported for this mode. */
4877 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4878 return false;
4879
4880 if (misalignment == -1)
4881 {
4882 /* Misalignment factor is unknown at compile time but we know
4883 it's word aligned. */
4884 if (rs6000_vector_alignment_reachable (type, is_packed))
4885 {
4886 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4887
4888 if (element_size == 64 || element_size == 32)
4889 return true;
4890 }
4891
4892 return false;
4893 }
4894
4895 /* VSX supports word-aligned vector. */
4896 if (misalignment % 4 == 0)
4897 return true;
4898 }
4899 return false;
4900 }
4901
4902 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4903 static int
4904 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4905 tree vectype, int misalign)
4906 {
4907 unsigned elements;
4908 tree elem_type;
4909
4910 switch (type_of_cost)
4911 {
4912 case scalar_stmt:
4913 case scalar_load:
4914 case scalar_store:
4915 case vector_stmt:
4916 case vector_load:
4917 case vector_store:
4918 case vec_to_scalar:
4919 case scalar_to_vec:
4920 case cond_branch_not_taken:
4921 return 1;
4922
4923 case vec_perm:
4924 if (TARGET_VSX)
4925 return 3;
4926 else
4927 return 1;
4928
4929 case vec_promote_demote:
4930 if (TARGET_VSX)
4931 return 4;
4932 else
4933 return 1;
4934
4935 case cond_branch_taken:
4936 return 3;
4937
4938 case unaligned_load:
4939 case vector_gather_load:
4940 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4941 return 1;
4942
4943 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4944 {
4945 elements = TYPE_VECTOR_SUBPARTS (vectype);
4946 if (elements == 2)
4947 /* Double word aligned. */
4948 return 2;
4949
4950 if (elements == 4)
4951 {
4952 switch (misalign)
4953 {
4954 case 8:
4955 /* Double word aligned. */
4956 return 2;
4957
4958 case -1:
4959 /* Unknown misalignment. */
4960 case 4:
4961 case 12:
4962 /* Word aligned. */
4963 return 22;
4964
4965 default:
4966 gcc_unreachable ();
4967 }
4968 }
4969 }
4970
4971 if (TARGET_ALTIVEC)
4972 /* Misaligned loads are not supported. */
4973 gcc_unreachable ();
4974
4975 return 2;
4976
4977 case unaligned_store:
4978 case vector_scatter_store:
4979 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4980 return 1;
4981
4982 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4983 {
4984 elements = TYPE_VECTOR_SUBPARTS (vectype);
4985 if (elements == 2)
4986 /* Double word aligned. */
4987 return 2;
4988
4989 if (elements == 4)
4990 {
4991 switch (misalign)
4992 {
4993 case 8:
4994 /* Double word aligned. */
4995 return 2;
4996
4997 case -1:
4998 /* Unknown misalignment. */
4999 case 4:
5000 case 12:
5001 /* Word aligned. */
5002 return 23;
5003
5004 default:
5005 gcc_unreachable ();
5006 }
5007 }
5008 }
5009
5010 if (TARGET_ALTIVEC)
5011 /* Misaligned stores are not supported. */
5012 gcc_unreachable ();
5013
5014 return 2;
5015
5016 case vec_construct:
5017 /* This is a rough approximation assuming non-constant elements
5018 constructed into a vector via element insertion. FIXME:
5019 vec_construct is not granular enough for uniformly good
5020 decisions. If the initialization is a splat, this is
5021 cheaper than we estimate. Improve this someday. */
5022 elem_type = TREE_TYPE (vectype);
5023 /* 32-bit vectors loaded into registers are stored as double
5024 precision, so we need 2 permutes, 2 converts, and 1 merge
5025 to construct a vector of short floats from them. */
5026 if (SCALAR_FLOAT_TYPE_P (elem_type)
5027 && TYPE_PRECISION (elem_type) == 32)
5028 return 5;
5029 /* On POWER9, integer vector types are built up in GPRs and then
5030 use a direct move (2 cycles). For POWER8 this is even worse,
5031 as we need two direct moves and a merge, and the direct moves
5032 are five cycles. */
5033 else if (INTEGRAL_TYPE_P (elem_type))
5034 {
5035 if (TARGET_P9_VECTOR)
5036 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5037 else
5038 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5039 }
5040 else
5041 /* V2DFmode doesn't need a direct move. */
5042 return 2;
5043
5044 default:
5045 gcc_unreachable ();
5046 }
5047 }
5048
5049 /* Implement targetm.vectorize.preferred_simd_mode. */
5050
5051 static machine_mode
5052 rs6000_preferred_simd_mode (scalar_mode mode)
5053 {
5054 if (TARGET_VSX)
5055 switch (mode)
5056 {
5057 case E_DFmode:
5058 return V2DFmode;
5059 default:;
5060 }
5061 if (TARGET_ALTIVEC || TARGET_VSX)
5062 switch (mode)
5063 {
5064 case E_SFmode:
5065 return V4SFmode;
5066 case E_TImode:
5067 return V1TImode;
5068 case E_DImode:
5069 return V2DImode;
5070 case E_SImode:
5071 return V4SImode;
5072 case E_HImode:
5073 return V8HImode;
5074 case E_QImode:
5075 return V16QImode;
5076 default:;
5077 }
5078 return word_mode;
5079 }
5080
5081 typedef struct _rs6000_cost_data
5082 {
5083 struct loop *loop_info;
5084 unsigned cost[3];
5085 } rs6000_cost_data;
5086
5087 /* Test for likely overcommitment of vector hardware resources. If a
5088 loop iteration is relatively large, and too large a percentage of
5089 instructions in the loop are vectorized, the cost model may not
5090 adequately reflect delays from unavailable vector resources.
5091 Penalize the loop body cost for this case. */
5092
5093 static void
5094 rs6000_density_test (rs6000_cost_data *data)
5095 {
5096 const int DENSITY_PCT_THRESHOLD = 85;
5097 const int DENSITY_SIZE_THRESHOLD = 70;
5098 const int DENSITY_PENALTY = 10;
5099 struct loop *loop = data->loop_info;
5100 basic_block *bbs = get_loop_body (loop);
5101 int nbbs = loop->num_nodes;
5102 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5103 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5104 int i, density_pct;
5105
5106 for (i = 0; i < nbbs; i++)
5107 {
5108 basic_block bb = bbs[i];
5109 gimple_stmt_iterator gsi;
5110
5111 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5112 {
5113 gimple *stmt = gsi_stmt (gsi);
5114 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5115
5116 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5117 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5118 not_vec_cost++;
5119 }
5120 }
5121
5122 free (bbs);
5123 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5124
5125 if (density_pct > DENSITY_PCT_THRESHOLD
5126 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5127 {
5128 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5129 if (dump_enabled_p ())
5130 dump_printf_loc (MSG_NOTE, vect_location,
5131 "density %d%%, cost %d exceeds threshold, penalizing "
5132 "loop body cost by %d%%", density_pct,
5133 vec_cost + not_vec_cost, DENSITY_PENALTY);
5134 }
5135 }
5136
5137 /* Implement targetm.vectorize.init_cost. */
5138
5139 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5140 instruction is needed by the vectorization. */
5141 static bool rs6000_vect_nonmem;
5142
5143 static void *
5144 rs6000_init_cost (struct loop *loop_info)
5145 {
5146 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5147 data->loop_info = loop_info;
5148 data->cost[vect_prologue] = 0;
5149 data->cost[vect_body] = 0;
5150 data->cost[vect_epilogue] = 0;
5151 rs6000_vect_nonmem = false;
5152 return data;
5153 }
5154
5155 /* Implement targetm.vectorize.add_stmt_cost. */
5156
5157 static unsigned
5158 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5159 struct _stmt_vec_info *stmt_info, int misalign,
5160 enum vect_cost_model_location where)
5161 {
5162 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5163 unsigned retval = 0;
5164
5165 if (flag_vect_cost_model)
5166 {
5167 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5168 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5169 misalign);
5170 /* Statements in an inner loop relative to the loop being
5171 vectorized are weighted more heavily. The value here is
5172 arbitrary and could potentially be improved with analysis. */
5173 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5174 count *= 50; /* FIXME. */
5175
5176 retval = (unsigned) (count * stmt_cost);
5177 cost_data->cost[where] += retval;
5178
5179 /* Check whether we're doing something other than just a copy loop.
5180 Not all such loops may be profitably vectorized; see
5181 rs6000_finish_cost. */
5182 if ((kind == vec_to_scalar || kind == vec_perm
5183 || kind == vec_promote_demote || kind == vec_construct
5184 || kind == scalar_to_vec)
5185 || (where == vect_body && kind == vector_stmt))
5186 rs6000_vect_nonmem = true;
5187 }
5188
5189 return retval;
5190 }
5191
5192 /* Implement targetm.vectorize.finish_cost. */
5193
5194 static void
5195 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5196 unsigned *body_cost, unsigned *epilogue_cost)
5197 {
5198 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5199
5200 if (cost_data->loop_info)
5201 rs6000_density_test (cost_data);
5202
5203 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5204 that require versioning for any reason. The vectorization is at
5205 best a wash inside the loop, and the versioning checks make
5206 profitability highly unlikely and potentially quite harmful. */
5207 if (cost_data->loop_info)
5208 {
5209 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5210 if (!rs6000_vect_nonmem
5211 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5212 && LOOP_REQUIRES_VERSIONING (vec_info))
5213 cost_data->cost[vect_body] += 10000;
5214 }
5215
5216 *prologue_cost = cost_data->cost[vect_prologue];
5217 *body_cost = cost_data->cost[vect_body];
5218 *epilogue_cost = cost_data->cost[vect_epilogue];
5219 }
5220
5221 /* Implement targetm.vectorize.destroy_cost_data. */
5222
5223 static void
5224 rs6000_destroy_cost_data (void *data)
5225 {
5226 free (data);
5227 }
5228
5229 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5230 library with vectorized intrinsics. */
5231
5232 static tree
5233 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5234 tree type_in)
5235 {
5236 char name[32];
5237 const char *suffix = NULL;
5238 tree fntype, new_fndecl, bdecl = NULL_TREE;
5239 int n_args = 1;
5240 const char *bname;
5241 machine_mode el_mode, in_mode;
5242 int n, in_n;
5243
5244 /* Libmass is suitable for unsafe math only as it does not correctly support
5245 parts of IEEE with the required precision such as denormals. Only support
5246 it if we have VSX to use the simd d2 or f4 functions.
5247 XXX: Add variable length support. */
5248 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5249 return NULL_TREE;
5250
5251 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5252 n = TYPE_VECTOR_SUBPARTS (type_out);
5253 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5254 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5255 if (el_mode != in_mode
5256 || n != in_n)
5257 return NULL_TREE;
5258
5259 switch (fn)
5260 {
5261 CASE_CFN_ATAN2:
5262 CASE_CFN_HYPOT:
5263 CASE_CFN_POW:
5264 n_args = 2;
5265 gcc_fallthrough ();
5266
5267 CASE_CFN_ACOS:
5268 CASE_CFN_ACOSH:
5269 CASE_CFN_ASIN:
5270 CASE_CFN_ASINH:
5271 CASE_CFN_ATAN:
5272 CASE_CFN_ATANH:
5273 CASE_CFN_CBRT:
5274 CASE_CFN_COS:
5275 CASE_CFN_COSH:
5276 CASE_CFN_ERF:
5277 CASE_CFN_ERFC:
5278 CASE_CFN_EXP2:
5279 CASE_CFN_EXP:
5280 CASE_CFN_EXPM1:
5281 CASE_CFN_LGAMMA:
5282 CASE_CFN_LOG10:
5283 CASE_CFN_LOG1P:
5284 CASE_CFN_LOG2:
5285 CASE_CFN_LOG:
5286 CASE_CFN_SIN:
5287 CASE_CFN_SINH:
5288 CASE_CFN_SQRT:
5289 CASE_CFN_TAN:
5290 CASE_CFN_TANH:
5291 if (el_mode == DFmode && n == 2)
5292 {
5293 bdecl = mathfn_built_in (double_type_node, fn);
5294 suffix = "d2"; /* pow -> powd2 */
5295 }
5296 else if (el_mode == SFmode && n == 4)
5297 {
5298 bdecl = mathfn_built_in (float_type_node, fn);
5299 suffix = "4"; /* powf -> powf4 */
5300 }
5301 else
5302 return NULL_TREE;
5303 if (!bdecl)
5304 return NULL_TREE;
5305 break;
5306
5307 default:
5308 return NULL_TREE;
5309 }
5310
5311 gcc_assert (suffix != NULL);
5312 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5313 if (!bname)
5314 return NULL_TREE;
5315
5316 strcpy (name, bname + sizeof ("__builtin_") - 1);
5317 strcat (name, suffix);
5318
5319 if (n_args == 1)
5320 fntype = build_function_type_list (type_out, type_in, NULL);
5321 else if (n_args == 2)
5322 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5323 else
5324 gcc_unreachable ();
5325
5326 /* Build a function declaration for the vectorized function. */
5327 new_fndecl = build_decl (BUILTINS_LOCATION,
5328 FUNCTION_DECL, get_identifier (name), fntype);
5329 TREE_PUBLIC (new_fndecl) = 1;
5330 DECL_EXTERNAL (new_fndecl) = 1;
5331 DECL_IS_NOVOPS (new_fndecl) = 1;
5332 TREE_READONLY (new_fndecl) = 1;
5333
5334 return new_fndecl;
5335 }
5336
5337 /* Returns a function decl for a vectorized version of the builtin function
5338 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5339 if it is not available. */
5340
5341 static tree
5342 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5343 tree type_in)
5344 {
5345 machine_mode in_mode, out_mode;
5346 int in_n, out_n;
5347
5348 if (TARGET_DEBUG_BUILTIN)
5349 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5350 combined_fn_name (combined_fn (fn)),
5351 GET_MODE_NAME (TYPE_MODE (type_out)),
5352 GET_MODE_NAME (TYPE_MODE (type_in)));
5353
5354 if (TREE_CODE (type_out) != VECTOR_TYPE
5355 || TREE_CODE (type_in) != VECTOR_TYPE)
5356 return NULL_TREE;
5357
5358 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5359 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5360 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5361 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5362
5363 switch (fn)
5364 {
5365 CASE_CFN_COPYSIGN:
5366 if (VECTOR_UNIT_VSX_P (V2DFmode)
5367 && out_mode == DFmode && out_n == 2
5368 && in_mode == DFmode && in_n == 2)
5369 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5370 if (VECTOR_UNIT_VSX_P (V4SFmode)
5371 && out_mode == SFmode && out_n == 4
5372 && in_mode == SFmode && in_n == 4)
5373 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5374 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5375 && out_mode == SFmode && out_n == 4
5376 && in_mode == SFmode && in_n == 4)
5377 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5378 break;
5379 CASE_CFN_CEIL:
5380 if (VECTOR_UNIT_VSX_P (V2DFmode)
5381 && out_mode == DFmode && out_n == 2
5382 && in_mode == DFmode && in_n == 2)
5383 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5384 if (VECTOR_UNIT_VSX_P (V4SFmode)
5385 && out_mode == SFmode && out_n == 4
5386 && in_mode == SFmode && in_n == 4)
5387 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5388 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5389 && out_mode == SFmode && out_n == 4
5390 && in_mode == SFmode && in_n == 4)
5391 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5392 break;
5393 CASE_CFN_FLOOR:
5394 if (VECTOR_UNIT_VSX_P (V2DFmode)
5395 && out_mode == DFmode && out_n == 2
5396 && in_mode == DFmode && in_n == 2)
5397 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5398 if (VECTOR_UNIT_VSX_P (V4SFmode)
5399 && out_mode == SFmode && out_n == 4
5400 && in_mode == SFmode && in_n == 4)
5401 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5402 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5403 && out_mode == SFmode && out_n == 4
5404 && in_mode == SFmode && in_n == 4)
5405 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5406 break;
5407 CASE_CFN_FMA:
5408 if (VECTOR_UNIT_VSX_P (V2DFmode)
5409 && out_mode == DFmode && out_n == 2
5410 && in_mode == DFmode && in_n == 2)
5411 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5412 if (VECTOR_UNIT_VSX_P (V4SFmode)
5413 && out_mode == SFmode && out_n == 4
5414 && in_mode == SFmode && in_n == 4)
5415 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5416 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5417 && out_mode == SFmode && out_n == 4
5418 && in_mode == SFmode && in_n == 4)
5419 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5420 break;
5421 CASE_CFN_TRUNC:
5422 if (VECTOR_UNIT_VSX_P (V2DFmode)
5423 && out_mode == DFmode && out_n == 2
5424 && in_mode == DFmode && in_n == 2)
5425 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5426 if (VECTOR_UNIT_VSX_P (V4SFmode)
5427 && out_mode == SFmode && out_n == 4
5428 && in_mode == SFmode && in_n == 4)
5429 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5430 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5431 && out_mode == SFmode && out_n == 4
5432 && in_mode == SFmode && in_n == 4)
5433 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5434 break;
5435 CASE_CFN_NEARBYINT:
5436 if (VECTOR_UNIT_VSX_P (V2DFmode)
5437 && flag_unsafe_math_optimizations
5438 && out_mode == DFmode && out_n == 2
5439 && in_mode == DFmode && in_n == 2)
5440 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5441 if (VECTOR_UNIT_VSX_P (V4SFmode)
5442 && flag_unsafe_math_optimizations
5443 && out_mode == SFmode && out_n == 4
5444 && in_mode == SFmode && in_n == 4)
5445 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5446 break;
5447 CASE_CFN_RINT:
5448 if (VECTOR_UNIT_VSX_P (V2DFmode)
5449 && !flag_trapping_math
5450 && out_mode == DFmode && out_n == 2
5451 && in_mode == DFmode && in_n == 2)
5452 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5453 if (VECTOR_UNIT_VSX_P (V4SFmode)
5454 && !flag_trapping_math
5455 && out_mode == SFmode && out_n == 4
5456 && in_mode == SFmode && in_n == 4)
5457 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5458 break;
5459 default:
5460 break;
5461 }
5462
5463 /* Generate calls to libmass if appropriate. */
5464 if (rs6000_veclib_handler)
5465 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5466
5467 return NULL_TREE;
5468 }
5469
5470 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5471
5472 static tree
5473 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5474 tree type_in)
5475 {
5476 machine_mode in_mode, out_mode;
5477 int in_n, out_n;
5478
5479 if (TARGET_DEBUG_BUILTIN)
5480 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5481 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5482 GET_MODE_NAME (TYPE_MODE (type_out)),
5483 GET_MODE_NAME (TYPE_MODE (type_in)));
5484
5485 if (TREE_CODE (type_out) != VECTOR_TYPE
5486 || TREE_CODE (type_in) != VECTOR_TYPE)
5487 return NULL_TREE;
5488
5489 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5490 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5491 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5492 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5493
5494 enum rs6000_builtins fn
5495 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5496 switch (fn)
5497 {
5498 case RS6000_BUILTIN_RSQRTF:
5499 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5500 && out_mode == SFmode && out_n == 4
5501 && in_mode == SFmode && in_n == 4)
5502 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5503 break;
5504 case RS6000_BUILTIN_RSQRT:
5505 if (VECTOR_UNIT_VSX_P (V2DFmode)
5506 && out_mode == DFmode && out_n == 2
5507 && in_mode == DFmode && in_n == 2)
5508 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5509 break;
5510 case RS6000_BUILTIN_RECIPF:
5511 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5512 && out_mode == SFmode && out_n == 4
5513 && in_mode == SFmode && in_n == 4)
5514 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5515 break;
5516 case RS6000_BUILTIN_RECIP:
5517 if (VECTOR_UNIT_VSX_P (V2DFmode)
5518 && out_mode == DFmode && out_n == 2
5519 && in_mode == DFmode && in_n == 2)
5520 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5521 break;
5522 default:
5523 break;
5524 }
5525 return NULL_TREE;
5526 }
5527 \f
5528 /* Default CPU string for rs6000*_file_start functions. */
5529 static const char *rs6000_default_cpu;
5530
5531 #ifdef USING_ELFOS_H
5532 const char *rs6000_machine;
5533
5534 const char *
5535 rs6000_machine_from_flags (void)
5536 {
5537 if ((rs6000_isa_flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER))
5538 != 0)
5539 return "future";
5540 if ((rs6000_isa_flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5541 return "power9";
5542 if ((rs6000_isa_flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5543 return "power8";
5544 if ((rs6000_isa_flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5545 return "power7";
5546 if ((rs6000_isa_flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5547 return "power6";
5548 if ((rs6000_isa_flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5549 return "power5";
5550 if ((rs6000_isa_flags & ISA_2_1_MASKS) != 0)
5551 return "power4";
5552 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5553 return "ppc64";
5554 return "ppc";
5555 }
5556
5557 void
5558 emit_asm_machine (void)
5559 {
5560 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5561 }
5562 #endif
5563
5564 /* Do anything needed at the start of the asm file. */
5565
5566 static void
5567 rs6000_file_start (void)
5568 {
5569 char buffer[80];
5570 const char *start = buffer;
5571 FILE *file = asm_out_file;
5572
5573 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5574
5575 default_file_start ();
5576
5577 if (flag_verbose_asm)
5578 {
5579 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5580
5581 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5582 {
5583 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5584 start = "";
5585 }
5586
5587 if (global_options_set.x_rs6000_cpu_index)
5588 {
5589 fprintf (file, "%s -mcpu=%s", start,
5590 processor_target_table[rs6000_cpu_index].name);
5591 start = "";
5592 }
5593
5594 if (global_options_set.x_rs6000_tune_index)
5595 {
5596 fprintf (file, "%s -mtune=%s", start,
5597 processor_target_table[rs6000_tune_index].name);
5598 start = "";
5599 }
5600
5601 if (PPC405_ERRATUM77)
5602 {
5603 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5604 start = "";
5605 }
5606
5607 #ifdef USING_ELFOS_H
5608 switch (rs6000_sdata)
5609 {
5610 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5611 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5612 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5613 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5614 }
5615
5616 if (rs6000_sdata && g_switch_value)
5617 {
5618 fprintf (file, "%s -G %d", start,
5619 g_switch_value);
5620 start = "";
5621 }
5622 #endif
5623
5624 if (*start == '\0')
5625 putc ('\n', file);
5626 }
5627
5628 #ifdef USING_ELFOS_H
5629 rs6000_machine = rs6000_machine_from_flags ();
5630 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5631 && !global_options_set.x_rs6000_cpu_index)
5632 emit_asm_machine ();
5633 #endif
5634
5635 if (DEFAULT_ABI == ABI_ELFv2)
5636 fprintf (file, "\t.abiversion 2\n");
5637 }
5638
5639 \f
5640 /* Return nonzero if this function is known to have a null epilogue. */
5641
5642 int
5643 direct_return (void)
5644 {
5645 if (reload_completed)
5646 {
5647 rs6000_stack_t *info = rs6000_stack_info ();
5648
5649 if (info->first_gp_reg_save == 32
5650 && info->first_fp_reg_save == 64
5651 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5652 && ! info->lr_save_p
5653 && ! info->cr_save_p
5654 && info->vrsave_size == 0
5655 && ! info->push_p)
5656 return 1;
5657 }
5658
5659 return 0;
5660 }
5661
5662 /* Helper for num_insns_constant. Calculate number of instructions to
5663 load VALUE to a single gpr using combinations of addi, addis, ori,
5664 oris and sldi instructions. */
5665
5666 static int
5667 num_insns_constant_gpr (HOST_WIDE_INT value)
5668 {
5669 /* signed constant loadable with addi */
5670 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5671 return 1;
5672
5673 /* constant loadable with addis */
5674 else if ((value & 0xffff) == 0
5675 && (value >> 31 == -1 || value >> 31 == 0))
5676 return 1;
5677
5678 else if (TARGET_POWERPC64)
5679 {
5680 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5681 HOST_WIDE_INT high = value >> 31;
5682
5683 if (high == 0 || high == -1)
5684 return 2;
5685
5686 high >>= 1;
5687
5688 if (low == 0)
5689 return num_insns_constant_gpr (high) + 1;
5690 else if (high == 0)
5691 return num_insns_constant_gpr (low) + 1;
5692 else
5693 return (num_insns_constant_gpr (high)
5694 + num_insns_constant_gpr (low) + 1);
5695 }
5696
5697 else
5698 return 2;
5699 }
5700
5701 /* Helper for num_insns_constant. Allow constants formed by the
5702 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5703 and handle modes that require multiple gprs. */
5704
5705 static int
5706 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5707 {
5708 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5709 int total = 0;
5710 while (nregs-- > 0)
5711 {
5712 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5713 int insns = num_insns_constant_gpr (low);
5714 if (insns > 2
5715 /* We won't get more than 2 from num_insns_constant_gpr
5716 except when TARGET_POWERPC64 and mode is DImode or
5717 wider, so the register mode must be DImode. */
5718 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5719 insns = 2;
5720 total += insns;
5721 value >>= BITS_PER_WORD;
5722 }
5723 return total;
5724 }
5725
5726 /* Return the number of instructions it takes to form a constant in as
5727 many gprs are needed for MODE. */
5728
5729 int
5730 num_insns_constant (rtx op, machine_mode mode)
5731 {
5732 HOST_WIDE_INT val;
5733
5734 switch (GET_CODE (op))
5735 {
5736 case CONST_INT:
5737 val = INTVAL (op);
5738 break;
5739
5740 case CONST_WIDE_INT:
5741 {
5742 int insns = 0;
5743 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5744 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5745 DImode);
5746 return insns;
5747 }
5748
5749 case CONST_DOUBLE:
5750 {
5751 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5752
5753 if (mode == SFmode || mode == SDmode)
5754 {
5755 long l;
5756
5757 if (mode == SDmode)
5758 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5759 else
5760 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5761 /* See the first define_split in rs6000.md handling a
5762 const_double_operand. */
5763 val = l;
5764 mode = SImode;
5765 }
5766 else if (mode == DFmode || mode == DDmode)
5767 {
5768 long l[2];
5769
5770 if (mode == DDmode)
5771 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5772 else
5773 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5774
5775 /* See the second (32-bit) and third (64-bit) define_split
5776 in rs6000.md handling a const_double_operand. */
5777 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5778 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5779 mode = DImode;
5780 }
5781 else if (mode == TFmode || mode == TDmode
5782 || mode == KFmode || mode == IFmode)
5783 {
5784 long l[4];
5785 int insns;
5786
5787 if (mode == TDmode)
5788 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5789 else
5790 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5791
5792 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5793 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5794 insns = num_insns_constant_multi (val, DImode);
5795 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5796 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5797 insns += num_insns_constant_multi (val, DImode);
5798 return insns;
5799 }
5800 else
5801 gcc_unreachable ();
5802 }
5803 break;
5804
5805 default:
5806 gcc_unreachable ();
5807 }
5808
5809 return num_insns_constant_multi (val, mode);
5810 }
5811
5812 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5813 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5814 corresponding element of the vector, but for V4SFmode, the
5815 corresponding "float" is interpreted as an SImode integer. */
5816
5817 HOST_WIDE_INT
5818 const_vector_elt_as_int (rtx op, unsigned int elt)
5819 {
5820 rtx tmp;
5821
5822 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5823 gcc_assert (GET_MODE (op) != V2DImode
5824 && GET_MODE (op) != V2DFmode);
5825
5826 tmp = CONST_VECTOR_ELT (op, elt);
5827 if (GET_MODE (op) == V4SFmode)
5828 tmp = gen_lowpart (SImode, tmp);
5829 return INTVAL (tmp);
5830 }
5831
5832 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5833 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5834 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5835 all items are set to the same value and contain COPIES replicas of the
5836 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5837 operand and the others are set to the value of the operand's msb. */
5838
5839 static bool
5840 vspltis_constant (rtx op, unsigned step, unsigned copies)
5841 {
5842 machine_mode mode = GET_MODE (op);
5843 machine_mode inner = GET_MODE_INNER (mode);
5844
5845 unsigned i;
5846 unsigned nunits;
5847 unsigned bitsize;
5848 unsigned mask;
5849
5850 HOST_WIDE_INT val;
5851 HOST_WIDE_INT splat_val;
5852 HOST_WIDE_INT msb_val;
5853
5854 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5855 return false;
5856
5857 nunits = GET_MODE_NUNITS (mode);
5858 bitsize = GET_MODE_BITSIZE (inner);
5859 mask = GET_MODE_MASK (inner);
5860
5861 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5862 splat_val = val;
5863 msb_val = val >= 0 ? 0 : -1;
5864
5865 /* Construct the value to be splatted, if possible. If not, return 0. */
5866 for (i = 2; i <= copies; i *= 2)
5867 {
5868 HOST_WIDE_INT small_val;
5869 bitsize /= 2;
5870 small_val = splat_val >> bitsize;
5871 mask >>= bitsize;
5872 if (splat_val != ((HOST_WIDE_INT)
5873 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5874 | (small_val & mask)))
5875 return false;
5876 splat_val = small_val;
5877 }
5878
5879 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5880 if (EASY_VECTOR_15 (splat_val))
5881 ;
5882
5883 /* Also check if we can splat, and then add the result to itself. Do so if
5884 the value is positive, of if the splat instruction is using OP's mode;
5885 for splat_val < 0, the splat and the add should use the same mode. */
5886 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5887 && (splat_val >= 0 || (step == 1 && copies == 1)))
5888 ;
5889
5890 /* Also check if are loading up the most significant bit which can be done by
5891 loading up -1 and shifting the value left by -1. */
5892 else if (EASY_VECTOR_MSB (splat_val, inner))
5893 ;
5894
5895 else
5896 return false;
5897
5898 /* Check if VAL is present in every STEP-th element, and the
5899 other elements are filled with its most significant bit. */
5900 for (i = 1; i < nunits; ++i)
5901 {
5902 HOST_WIDE_INT desired_val;
5903 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5904 if ((i & (step - 1)) == 0)
5905 desired_val = val;
5906 else
5907 desired_val = msb_val;
5908
5909 if (desired_val != const_vector_elt_as_int (op, elt))
5910 return false;
5911 }
5912
5913 return true;
5914 }
5915
5916 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5917 instruction, filling in the bottom elements with 0 or -1.
5918
5919 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5920 for the number of zeroes to shift in, or negative for the number of 0xff
5921 bytes to shift in.
5922
5923 OP is a CONST_VECTOR. */
5924
5925 int
5926 vspltis_shifted (rtx op)
5927 {
5928 machine_mode mode = GET_MODE (op);
5929 machine_mode inner = GET_MODE_INNER (mode);
5930
5931 unsigned i, j;
5932 unsigned nunits;
5933 unsigned mask;
5934
5935 HOST_WIDE_INT val;
5936
5937 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5938 return false;
5939
5940 /* We need to create pseudo registers to do the shift, so don't recognize
5941 shift vector constants after reload. */
5942 if (!can_create_pseudo_p ())
5943 return false;
5944
5945 nunits = GET_MODE_NUNITS (mode);
5946 mask = GET_MODE_MASK (inner);
5947
5948 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5949
5950 /* Check if the value can really be the operand of a vspltis[bhw]. */
5951 if (EASY_VECTOR_15 (val))
5952 ;
5953
5954 /* Also check if we are loading up the most significant bit which can be done
5955 by loading up -1 and shifting the value left by -1. */
5956 else if (EASY_VECTOR_MSB (val, inner))
5957 ;
5958
5959 else
5960 return 0;
5961
5962 /* Check if VAL is present in every STEP-th element until we find elements
5963 that are 0 or all 1 bits. */
5964 for (i = 1; i < nunits; ++i)
5965 {
5966 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5967 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5968
5969 /* If the value isn't the splat value, check for the remaining elements
5970 being 0/-1. */
5971 if (val != elt_val)
5972 {
5973 if (elt_val == 0)
5974 {
5975 for (j = i+1; j < nunits; ++j)
5976 {
5977 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5978 if (const_vector_elt_as_int (op, elt2) != 0)
5979 return 0;
5980 }
5981
5982 return (nunits - i) * GET_MODE_SIZE (inner);
5983 }
5984
5985 else if ((elt_val & mask) == mask)
5986 {
5987 for (j = i+1; j < nunits; ++j)
5988 {
5989 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5990 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5991 return 0;
5992 }
5993
5994 return -((nunits - i) * GET_MODE_SIZE (inner));
5995 }
5996
5997 else
5998 return 0;
5999 }
6000 }
6001
6002 /* If all elements are equal, we don't need to do VLSDOI. */
6003 return 0;
6004 }
6005
6006
6007 /* Return true if OP is of the given MODE and can be synthesized
6008 with a vspltisb, vspltish or vspltisw. */
6009
6010 bool
6011 easy_altivec_constant (rtx op, machine_mode mode)
6012 {
6013 unsigned step, copies;
6014
6015 if (mode == VOIDmode)
6016 mode = GET_MODE (op);
6017 else if (mode != GET_MODE (op))
6018 return false;
6019
6020 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6021 constants. */
6022 if (mode == V2DFmode)
6023 return zero_constant (op, mode);
6024
6025 else if (mode == V2DImode)
6026 {
6027 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6028 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6029 return false;
6030
6031 if (zero_constant (op, mode))
6032 return true;
6033
6034 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6035 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6036 return true;
6037
6038 return false;
6039 }
6040
6041 /* V1TImode is a special container for TImode. Ignore for now. */
6042 else if (mode == V1TImode)
6043 return false;
6044
6045 /* Start with a vspltisw. */
6046 step = GET_MODE_NUNITS (mode) / 4;
6047 copies = 1;
6048
6049 if (vspltis_constant (op, step, copies))
6050 return true;
6051
6052 /* Then try with a vspltish. */
6053 if (step == 1)
6054 copies <<= 1;
6055 else
6056 step >>= 1;
6057
6058 if (vspltis_constant (op, step, copies))
6059 return true;
6060
6061 /* And finally a vspltisb. */
6062 if (step == 1)
6063 copies <<= 1;
6064 else
6065 step >>= 1;
6066
6067 if (vspltis_constant (op, step, copies))
6068 return true;
6069
6070 if (vspltis_shifted (op) != 0)
6071 return true;
6072
6073 return false;
6074 }
6075
6076 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6077 result is OP. Abort if it is not possible. */
6078
6079 rtx
6080 gen_easy_altivec_constant (rtx op)
6081 {
6082 machine_mode mode = GET_MODE (op);
6083 int nunits = GET_MODE_NUNITS (mode);
6084 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6085 unsigned step = nunits / 4;
6086 unsigned copies = 1;
6087
6088 /* Start with a vspltisw. */
6089 if (vspltis_constant (op, step, copies))
6090 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6091
6092 /* Then try with a vspltish. */
6093 if (step == 1)
6094 copies <<= 1;
6095 else
6096 step >>= 1;
6097
6098 if (vspltis_constant (op, step, copies))
6099 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6100
6101 /* And finally a vspltisb. */
6102 if (step == 1)
6103 copies <<= 1;
6104 else
6105 step >>= 1;
6106
6107 if (vspltis_constant (op, step, copies))
6108 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6109
6110 gcc_unreachable ();
6111 }
6112
6113 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6114 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6115
6116 Return the number of instructions needed (1 or 2) into the address pointed
6117 via NUM_INSNS_PTR.
6118
6119 Return the constant that is being split via CONSTANT_PTR. */
6120
6121 bool
6122 xxspltib_constant_p (rtx op,
6123 machine_mode mode,
6124 int *num_insns_ptr,
6125 int *constant_ptr)
6126 {
6127 size_t nunits = GET_MODE_NUNITS (mode);
6128 size_t i;
6129 HOST_WIDE_INT value;
6130 rtx element;
6131
6132 /* Set the returned values to out of bound values. */
6133 *num_insns_ptr = -1;
6134 *constant_ptr = 256;
6135
6136 if (!TARGET_P9_VECTOR)
6137 return false;
6138
6139 if (mode == VOIDmode)
6140 mode = GET_MODE (op);
6141
6142 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6143 return false;
6144
6145 /* Handle (vec_duplicate <constant>). */
6146 if (GET_CODE (op) == VEC_DUPLICATE)
6147 {
6148 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6149 && mode != V2DImode)
6150 return false;
6151
6152 element = XEXP (op, 0);
6153 if (!CONST_INT_P (element))
6154 return false;
6155
6156 value = INTVAL (element);
6157 if (!IN_RANGE (value, -128, 127))
6158 return false;
6159 }
6160
6161 /* Handle (const_vector [...]). */
6162 else if (GET_CODE (op) == CONST_VECTOR)
6163 {
6164 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6165 && mode != V2DImode)
6166 return false;
6167
6168 element = CONST_VECTOR_ELT (op, 0);
6169 if (!CONST_INT_P (element))
6170 return false;
6171
6172 value = INTVAL (element);
6173 if (!IN_RANGE (value, -128, 127))
6174 return false;
6175
6176 for (i = 1; i < nunits; i++)
6177 {
6178 element = CONST_VECTOR_ELT (op, i);
6179 if (!CONST_INT_P (element))
6180 return false;
6181
6182 if (value != INTVAL (element))
6183 return false;
6184 }
6185 }
6186
6187 /* Handle integer constants being loaded into the upper part of the VSX
6188 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6189 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6190 else if (CONST_INT_P (op))
6191 {
6192 if (!SCALAR_INT_MODE_P (mode))
6193 return false;
6194
6195 value = INTVAL (op);
6196 if (!IN_RANGE (value, -128, 127))
6197 return false;
6198
6199 if (!IN_RANGE (value, -1, 0))
6200 {
6201 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6202 return false;
6203
6204 if (EASY_VECTOR_15 (value))
6205 return false;
6206 }
6207 }
6208
6209 else
6210 return false;
6211
6212 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6213 sign extend. Special case 0/-1 to allow getting any VSX register instead
6214 of an Altivec register. */
6215 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6216 && EASY_VECTOR_15 (value))
6217 return false;
6218
6219 /* Return # of instructions and the constant byte for XXSPLTIB. */
6220 if (mode == V16QImode)
6221 *num_insns_ptr = 1;
6222
6223 else if (IN_RANGE (value, -1, 0))
6224 *num_insns_ptr = 1;
6225
6226 else
6227 *num_insns_ptr = 2;
6228
6229 *constant_ptr = (int) value;
6230 return true;
6231 }
6232
6233 const char *
6234 output_vec_const_move (rtx *operands)
6235 {
6236 int shift;
6237 machine_mode mode;
6238 rtx dest, vec;
6239
6240 dest = operands[0];
6241 vec = operands[1];
6242 mode = GET_MODE (dest);
6243
6244 if (TARGET_VSX)
6245 {
6246 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6247 int xxspltib_value = 256;
6248 int num_insns = -1;
6249
6250 if (zero_constant (vec, mode))
6251 {
6252 if (TARGET_P9_VECTOR)
6253 return "xxspltib %x0,0";
6254
6255 else if (dest_vmx_p)
6256 return "vspltisw %0,0";
6257
6258 else
6259 return "xxlxor %x0,%x0,%x0";
6260 }
6261
6262 if (all_ones_constant (vec, mode))
6263 {
6264 if (TARGET_P9_VECTOR)
6265 return "xxspltib %x0,255";
6266
6267 else if (dest_vmx_p)
6268 return "vspltisw %0,-1";
6269
6270 else if (TARGET_P8_VECTOR)
6271 return "xxlorc %x0,%x0,%x0";
6272
6273 else
6274 gcc_unreachable ();
6275 }
6276
6277 if (TARGET_P9_VECTOR
6278 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6279 {
6280 if (num_insns == 1)
6281 {
6282 operands[2] = GEN_INT (xxspltib_value & 0xff);
6283 return "xxspltib %x0,%2";
6284 }
6285
6286 return "#";
6287 }
6288 }
6289
6290 if (TARGET_ALTIVEC)
6291 {
6292 rtx splat_vec;
6293
6294 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6295 if (zero_constant (vec, mode))
6296 return "vspltisw %0,0";
6297
6298 if (all_ones_constant (vec, mode))
6299 return "vspltisw %0,-1";
6300
6301 /* Do we need to construct a value using VSLDOI? */
6302 shift = vspltis_shifted (vec);
6303 if (shift != 0)
6304 return "#";
6305
6306 splat_vec = gen_easy_altivec_constant (vec);
6307 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6308 operands[1] = XEXP (splat_vec, 0);
6309 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6310 return "#";
6311
6312 switch (GET_MODE (splat_vec))
6313 {
6314 case E_V4SImode:
6315 return "vspltisw %0,%1";
6316
6317 case E_V8HImode:
6318 return "vspltish %0,%1";
6319
6320 case E_V16QImode:
6321 return "vspltisb %0,%1";
6322
6323 default:
6324 gcc_unreachable ();
6325 }
6326 }
6327
6328 gcc_unreachable ();
6329 }
6330
6331 /* Initialize vector TARGET to VALS. */
6332
6333 void
6334 rs6000_expand_vector_init (rtx target, rtx vals)
6335 {
6336 machine_mode mode = GET_MODE (target);
6337 machine_mode inner_mode = GET_MODE_INNER (mode);
6338 int n_elts = GET_MODE_NUNITS (mode);
6339 int n_var = 0, one_var = -1;
6340 bool all_same = true, all_const_zero = true;
6341 rtx x, mem;
6342 int i;
6343
6344 for (i = 0; i < n_elts; ++i)
6345 {
6346 x = XVECEXP (vals, 0, i);
6347 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6348 ++n_var, one_var = i;
6349 else if (x != CONST0_RTX (inner_mode))
6350 all_const_zero = false;
6351
6352 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6353 all_same = false;
6354 }
6355
6356 if (n_var == 0)
6357 {
6358 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6359 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6360 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6361 {
6362 /* Zero register. */
6363 emit_move_insn (target, CONST0_RTX (mode));
6364 return;
6365 }
6366 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6367 {
6368 /* Splat immediate. */
6369 emit_insn (gen_rtx_SET (target, const_vec));
6370 return;
6371 }
6372 else
6373 {
6374 /* Load from constant pool. */
6375 emit_move_insn (target, const_vec);
6376 return;
6377 }
6378 }
6379
6380 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6381 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6382 {
6383 rtx op[2];
6384 size_t i;
6385 size_t num_elements = all_same ? 1 : 2;
6386 for (i = 0; i < num_elements; i++)
6387 {
6388 op[i] = XVECEXP (vals, 0, i);
6389 /* Just in case there is a SUBREG with a smaller mode, do a
6390 conversion. */
6391 if (GET_MODE (op[i]) != inner_mode)
6392 {
6393 rtx tmp = gen_reg_rtx (inner_mode);
6394 convert_move (tmp, op[i], 0);
6395 op[i] = tmp;
6396 }
6397 /* Allow load with splat double word. */
6398 else if (MEM_P (op[i]))
6399 {
6400 if (!all_same)
6401 op[i] = force_reg (inner_mode, op[i]);
6402 }
6403 else if (!REG_P (op[i]))
6404 op[i] = force_reg (inner_mode, op[i]);
6405 }
6406
6407 if (all_same)
6408 {
6409 if (mode == V2DFmode)
6410 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6411 else
6412 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6413 }
6414 else
6415 {
6416 if (mode == V2DFmode)
6417 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6418 else
6419 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6420 }
6421 return;
6422 }
6423
6424 /* Special case initializing vector int if we are on 64-bit systems with
6425 direct move or we have the ISA 3.0 instructions. */
6426 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6427 && TARGET_DIRECT_MOVE_64BIT)
6428 {
6429 if (all_same)
6430 {
6431 rtx element0 = XVECEXP (vals, 0, 0);
6432 if (MEM_P (element0))
6433 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6434 else
6435 element0 = force_reg (SImode, element0);
6436
6437 if (TARGET_P9_VECTOR)
6438 emit_insn (gen_vsx_splat_v4si (target, element0));
6439 else
6440 {
6441 rtx tmp = gen_reg_rtx (DImode);
6442 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6443 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6444 }
6445 return;
6446 }
6447 else
6448 {
6449 rtx elements[4];
6450 size_t i;
6451
6452 for (i = 0; i < 4; i++)
6453 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6454
6455 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6456 elements[2], elements[3]));
6457 return;
6458 }
6459 }
6460
6461 /* With single precision floating point on VSX, know that internally single
6462 precision is actually represented as a double, and either make 2 V2DF
6463 vectors, and convert these vectors to single precision, or do one
6464 conversion, and splat the result to the other elements. */
6465 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6466 {
6467 if (all_same)
6468 {
6469 rtx element0 = XVECEXP (vals, 0, 0);
6470
6471 if (TARGET_P9_VECTOR)
6472 {
6473 if (MEM_P (element0))
6474 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6475
6476 emit_insn (gen_vsx_splat_v4sf (target, element0));
6477 }
6478
6479 else
6480 {
6481 rtx freg = gen_reg_rtx (V4SFmode);
6482 rtx sreg = force_reg (SFmode, element0);
6483 rtx cvt = (TARGET_XSCVDPSPN
6484 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6485 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6486
6487 emit_insn (cvt);
6488 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6489 const0_rtx));
6490 }
6491 }
6492 else
6493 {
6494 rtx dbl_even = gen_reg_rtx (V2DFmode);
6495 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6496 rtx flt_even = gen_reg_rtx (V4SFmode);
6497 rtx flt_odd = gen_reg_rtx (V4SFmode);
6498 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6499 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6500 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6501 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6502
6503 /* Use VMRGEW if we can instead of doing a permute. */
6504 if (TARGET_P8_VECTOR)
6505 {
6506 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6507 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6508 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6509 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6510 if (BYTES_BIG_ENDIAN)
6511 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6512 else
6513 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6514 }
6515 else
6516 {
6517 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6518 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6519 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6520 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6521 rs6000_expand_extract_even (target, flt_even, flt_odd);
6522 }
6523 }
6524 return;
6525 }
6526
6527 /* Special case initializing vector short/char that are splats if we are on
6528 64-bit systems with direct move. */
6529 if (all_same && TARGET_DIRECT_MOVE_64BIT
6530 && (mode == V16QImode || mode == V8HImode))
6531 {
6532 rtx op0 = XVECEXP (vals, 0, 0);
6533 rtx di_tmp = gen_reg_rtx (DImode);
6534
6535 if (!REG_P (op0))
6536 op0 = force_reg (GET_MODE_INNER (mode), op0);
6537
6538 if (mode == V16QImode)
6539 {
6540 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6541 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6542 return;
6543 }
6544
6545 if (mode == V8HImode)
6546 {
6547 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6548 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6549 return;
6550 }
6551 }
6552
6553 /* Store value to stack temp. Load vector element. Splat. However, splat
6554 of 64-bit items is not supported on Altivec. */
6555 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6556 {
6557 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6558 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6559 XVECEXP (vals, 0, 0));
6560 x = gen_rtx_UNSPEC (VOIDmode,
6561 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6562 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6563 gen_rtvec (2,
6564 gen_rtx_SET (target, mem),
6565 x)));
6566 x = gen_rtx_VEC_SELECT (inner_mode, target,
6567 gen_rtx_PARALLEL (VOIDmode,
6568 gen_rtvec (1, const0_rtx)));
6569 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6570 return;
6571 }
6572
6573 /* One field is non-constant. Load constant then overwrite
6574 varying field. */
6575 if (n_var == 1)
6576 {
6577 rtx copy = copy_rtx (vals);
6578
6579 /* Load constant part of vector, substitute neighboring value for
6580 varying element. */
6581 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6582 rs6000_expand_vector_init (target, copy);
6583
6584 /* Insert variable. */
6585 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6586 return;
6587 }
6588
6589 /* Construct the vector in memory one field at a time
6590 and load the whole vector. */
6591 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6592 for (i = 0; i < n_elts; i++)
6593 emit_move_insn (adjust_address_nv (mem, inner_mode,
6594 i * GET_MODE_SIZE (inner_mode)),
6595 XVECEXP (vals, 0, i));
6596 emit_move_insn (target, mem);
6597 }
6598
6599 /* Set field ELT of TARGET to VAL. */
6600
6601 void
6602 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6603 {
6604 machine_mode mode = GET_MODE (target);
6605 machine_mode inner_mode = GET_MODE_INNER (mode);
6606 rtx reg = gen_reg_rtx (mode);
6607 rtx mask, mem, x;
6608 int width = GET_MODE_SIZE (inner_mode);
6609 int i;
6610
6611 val = force_reg (GET_MODE (val), val);
6612
6613 if (VECTOR_MEM_VSX_P (mode))
6614 {
6615 rtx insn = NULL_RTX;
6616 rtx elt_rtx = GEN_INT (elt);
6617
6618 if (mode == V2DFmode)
6619 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6620
6621 else if (mode == V2DImode)
6622 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6623
6624 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6625 {
6626 if (mode == V4SImode)
6627 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6628 else if (mode == V8HImode)
6629 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6630 else if (mode == V16QImode)
6631 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6632 else if (mode == V4SFmode)
6633 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6634 }
6635
6636 if (insn)
6637 {
6638 emit_insn (insn);
6639 return;
6640 }
6641 }
6642
6643 /* Simplify setting single element vectors like V1TImode. */
6644 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6645 {
6646 emit_move_insn (target, gen_lowpart (mode, val));
6647 return;
6648 }
6649
6650 /* Load single variable value. */
6651 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6652 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6653 x = gen_rtx_UNSPEC (VOIDmode,
6654 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6655 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6656 gen_rtvec (2,
6657 gen_rtx_SET (reg, mem),
6658 x)));
6659
6660 /* Linear sequence. */
6661 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6662 for (i = 0; i < 16; ++i)
6663 XVECEXP (mask, 0, i) = GEN_INT (i);
6664
6665 /* Set permute mask to insert element into target. */
6666 for (i = 0; i < width; ++i)
6667 XVECEXP (mask, 0, elt*width + i)
6668 = GEN_INT (i + 0x10);
6669 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6670
6671 if (BYTES_BIG_ENDIAN)
6672 x = gen_rtx_UNSPEC (mode,
6673 gen_rtvec (3, target, reg,
6674 force_reg (V16QImode, x)),
6675 UNSPEC_VPERM);
6676 else
6677 {
6678 if (TARGET_P9_VECTOR)
6679 x = gen_rtx_UNSPEC (mode,
6680 gen_rtvec (3, reg, target,
6681 force_reg (V16QImode, x)),
6682 UNSPEC_VPERMR);
6683 else
6684 {
6685 /* Invert selector. We prefer to generate VNAND on P8 so
6686 that future fusion opportunities can kick in, but must
6687 generate VNOR elsewhere. */
6688 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6689 rtx iorx = (TARGET_P8_VECTOR
6690 ? gen_rtx_IOR (V16QImode, notx, notx)
6691 : gen_rtx_AND (V16QImode, notx, notx));
6692 rtx tmp = gen_reg_rtx (V16QImode);
6693 emit_insn (gen_rtx_SET (tmp, iorx));
6694
6695 /* Permute with operands reversed and adjusted selector. */
6696 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6697 UNSPEC_VPERM);
6698 }
6699 }
6700
6701 emit_insn (gen_rtx_SET (target, x));
6702 }
6703
6704 /* Extract field ELT from VEC into TARGET. */
6705
6706 void
6707 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6708 {
6709 machine_mode mode = GET_MODE (vec);
6710 machine_mode inner_mode = GET_MODE_INNER (mode);
6711 rtx mem;
6712
6713 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6714 {
6715 switch (mode)
6716 {
6717 default:
6718 break;
6719 case E_V1TImode:
6720 emit_move_insn (target, gen_lowpart (TImode, vec));
6721 break;
6722 case E_V2DFmode:
6723 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6724 return;
6725 case E_V2DImode:
6726 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6727 return;
6728 case E_V4SFmode:
6729 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6730 return;
6731 case E_V16QImode:
6732 if (TARGET_DIRECT_MOVE_64BIT)
6733 {
6734 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6735 return;
6736 }
6737 else
6738 break;
6739 case E_V8HImode:
6740 if (TARGET_DIRECT_MOVE_64BIT)
6741 {
6742 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6743 return;
6744 }
6745 else
6746 break;
6747 case E_V4SImode:
6748 if (TARGET_DIRECT_MOVE_64BIT)
6749 {
6750 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6751 return;
6752 }
6753 break;
6754 }
6755 }
6756 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6757 && TARGET_DIRECT_MOVE_64BIT)
6758 {
6759 if (GET_MODE (elt) != DImode)
6760 {
6761 rtx tmp = gen_reg_rtx (DImode);
6762 convert_move (tmp, elt, 0);
6763 elt = tmp;
6764 }
6765 else if (!REG_P (elt))
6766 elt = force_reg (DImode, elt);
6767
6768 switch (mode)
6769 {
6770 case E_V1TImode:
6771 emit_move_insn (target, gen_lowpart (TImode, vec));
6772 return;
6773
6774 case E_V2DFmode:
6775 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6776 return;
6777
6778 case E_V2DImode:
6779 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6780 return;
6781
6782 case E_V4SFmode:
6783 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6784 return;
6785
6786 case E_V4SImode:
6787 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6788 return;
6789
6790 case E_V8HImode:
6791 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6792 return;
6793
6794 case E_V16QImode:
6795 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6796 return;
6797
6798 default:
6799 gcc_unreachable ();
6800 }
6801 }
6802
6803 /* Allocate mode-sized buffer. */
6804 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6805
6806 emit_move_insn (mem, vec);
6807 if (CONST_INT_P (elt))
6808 {
6809 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6810
6811 /* Add offset to field within buffer matching vector element. */
6812 mem = adjust_address_nv (mem, inner_mode,
6813 modulo_elt * GET_MODE_SIZE (inner_mode));
6814 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6815 }
6816 else
6817 {
6818 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6819 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6820 rtx new_addr = gen_reg_rtx (Pmode);
6821
6822 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6823 if (ele_size > 1)
6824 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6825 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6826 new_addr = change_address (mem, inner_mode, new_addr);
6827 emit_move_insn (target, new_addr);
6828 }
6829 }
6830
6831 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6832 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6833 temporary (BASE_TMP) to fixup the address. Return the new memory address
6834 that is valid for reads or writes to a given register (SCALAR_REG). */
6835
6836 rtx
6837 rs6000_adjust_vec_address (rtx scalar_reg,
6838 rtx mem,
6839 rtx element,
6840 rtx base_tmp,
6841 machine_mode scalar_mode)
6842 {
6843 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6844 rtx addr = XEXP (mem, 0);
6845 rtx element_offset;
6846 rtx new_addr;
6847 bool valid_addr_p;
6848
6849 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6850 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6851
6852 /* Calculate what we need to add to the address to get the element
6853 address. */
6854 if (CONST_INT_P (element))
6855 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6856 else
6857 {
6858 int byte_shift = exact_log2 (scalar_size);
6859 gcc_assert (byte_shift >= 0);
6860
6861 if (byte_shift == 0)
6862 element_offset = element;
6863
6864 else
6865 {
6866 if (TARGET_POWERPC64)
6867 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6868 else
6869 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6870
6871 element_offset = base_tmp;
6872 }
6873 }
6874
6875 /* Create the new address pointing to the element within the vector. If we
6876 are adding 0, we don't have to change the address. */
6877 if (element_offset == const0_rtx)
6878 new_addr = addr;
6879
6880 /* A simple indirect address can be converted into a reg + offset
6881 address. */
6882 else if (REG_P (addr) || SUBREG_P (addr))
6883 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6884
6885 /* Optimize D-FORM addresses with constant offset with a constant element, to
6886 include the element offset in the address directly. */
6887 else if (GET_CODE (addr) == PLUS)
6888 {
6889 rtx op0 = XEXP (addr, 0);
6890 rtx op1 = XEXP (addr, 1);
6891 rtx insn;
6892
6893 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6894 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6895 {
6896 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6897 rtx offset_rtx = GEN_INT (offset);
6898
6899 if (IN_RANGE (offset, -32768, 32767)
6900 && (scalar_size < 8 || (offset & 0x3) == 0))
6901 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6902 else
6903 {
6904 emit_move_insn (base_tmp, offset_rtx);
6905 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6906 }
6907 }
6908 else
6909 {
6910 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6911 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6912
6913 /* Note, ADDI requires the register being added to be a base
6914 register. If the register was R0, load it up into the temporary
6915 and do the add. */
6916 if (op1_reg_p
6917 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6918 {
6919 insn = gen_add3_insn (base_tmp, op1, element_offset);
6920 gcc_assert (insn != NULL_RTX);
6921 emit_insn (insn);
6922 }
6923
6924 else if (ele_reg_p
6925 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6926 {
6927 insn = gen_add3_insn (base_tmp, element_offset, op1);
6928 gcc_assert (insn != NULL_RTX);
6929 emit_insn (insn);
6930 }
6931
6932 else
6933 {
6934 emit_move_insn (base_tmp, op1);
6935 emit_insn (gen_add2_insn (base_tmp, element_offset));
6936 }
6937
6938 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6939 }
6940 }
6941
6942 else
6943 {
6944 emit_move_insn (base_tmp, addr);
6945 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6946 }
6947
6948 /* If we have a PLUS, we need to see whether the particular register class
6949 allows for D-FORM or X-FORM addressing. */
6950 if (GET_CODE (new_addr) == PLUS)
6951 {
6952 rtx op1 = XEXP (new_addr, 1);
6953 addr_mask_type addr_mask;
6954 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6955
6956 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6957 if (INT_REGNO_P (scalar_regno))
6958 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6959
6960 else if (FP_REGNO_P (scalar_regno))
6961 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6962
6963 else if (ALTIVEC_REGNO_P (scalar_regno))
6964 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6965
6966 else
6967 gcc_unreachable ();
6968
6969 if (REG_P (op1) || SUBREG_P (op1))
6970 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6971 else
6972 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6973 }
6974
6975 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6976 valid_addr_p = true;
6977
6978 else
6979 valid_addr_p = false;
6980
6981 if (!valid_addr_p)
6982 {
6983 emit_move_insn (base_tmp, new_addr);
6984 new_addr = base_tmp;
6985 }
6986
6987 return change_address (mem, scalar_mode, new_addr);
6988 }
6989
6990 /* Split a variable vec_extract operation into the component instructions. */
6991
6992 void
6993 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6994 rtx tmp_altivec)
6995 {
6996 machine_mode mode = GET_MODE (src);
6997 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6998 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6999 int byte_shift = exact_log2 (scalar_size);
7000
7001 gcc_assert (byte_shift >= 0);
7002
7003 /* If we are given a memory address, optimize to load just the element. We
7004 don't have to adjust the vector element number on little endian
7005 systems. */
7006 if (MEM_P (src))
7007 {
7008 int num_elements = GET_MODE_NUNITS (mode);
7009 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7010
7011 emit_insn (gen_anddi3 (element, element, num_ele_m1));
7012 gcc_assert (REG_P (tmp_gpr));
7013 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7014 tmp_gpr, scalar_mode));
7015 return;
7016 }
7017
7018 else if (REG_P (src) || SUBREG_P (src))
7019 {
7020 int num_elements = GET_MODE_NUNITS (mode);
7021 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7022 int bit_shift = 7 - exact_log2 (num_elements);
7023 rtx element2;
7024 unsigned int dest_regno = reg_or_subregno (dest);
7025 unsigned int src_regno = reg_or_subregno (src);
7026 unsigned int element_regno = reg_or_subregno (element);
7027
7028 gcc_assert (REG_P (tmp_gpr));
7029
7030 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7031 a general purpose register. */
7032 if (TARGET_P9_VECTOR
7033 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7034 && INT_REGNO_P (dest_regno)
7035 && ALTIVEC_REGNO_P (src_regno)
7036 && INT_REGNO_P (element_regno))
7037 {
7038 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7039 rtx element_si = gen_rtx_REG (SImode, element_regno);
7040
7041 if (mode == V16QImode)
7042 emit_insn (BYTES_BIG_ENDIAN
7043 ? gen_vextublx (dest_si, element_si, src)
7044 : gen_vextubrx (dest_si, element_si, src));
7045
7046 else if (mode == V8HImode)
7047 {
7048 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7049 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7050 emit_insn (BYTES_BIG_ENDIAN
7051 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7052 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7053 }
7054
7055
7056 else
7057 {
7058 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7059 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7060 emit_insn (BYTES_BIG_ENDIAN
7061 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7062 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7063 }
7064
7065 return;
7066 }
7067
7068
7069 gcc_assert (REG_P (tmp_altivec));
7070
7071 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7072 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7073 will shift the element into the upper position (adding 3 to convert a
7074 byte shift into a bit shift). */
7075 if (scalar_size == 8)
7076 {
7077 if (!BYTES_BIG_ENDIAN)
7078 {
7079 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7080 element2 = tmp_gpr;
7081 }
7082 else
7083 element2 = element;
7084
7085 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7086 bit. */
7087 emit_insn (gen_rtx_SET (tmp_gpr,
7088 gen_rtx_AND (DImode,
7089 gen_rtx_ASHIFT (DImode,
7090 element2,
7091 GEN_INT (6)),
7092 GEN_INT (64))));
7093 }
7094 else
7095 {
7096 if (!BYTES_BIG_ENDIAN)
7097 {
7098 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7099
7100 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7101 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7102 element2 = tmp_gpr;
7103 }
7104 else
7105 element2 = element;
7106
7107 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7108 }
7109
7110 /* Get the value into the lower byte of the Altivec register where VSLO
7111 expects it. */
7112 if (TARGET_P9_VECTOR)
7113 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7114 else if (can_create_pseudo_p ())
7115 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7116 else
7117 {
7118 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7119 emit_move_insn (tmp_di, tmp_gpr);
7120 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7121 }
7122
7123 /* Do the VSLO to get the value into the final location. */
7124 switch (mode)
7125 {
7126 case E_V2DFmode:
7127 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7128 return;
7129
7130 case E_V2DImode:
7131 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7132 return;
7133
7134 case E_V4SFmode:
7135 {
7136 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7137 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7138 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7139 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7140 tmp_altivec));
7141
7142 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7143 return;
7144 }
7145
7146 case E_V4SImode:
7147 case E_V8HImode:
7148 case E_V16QImode:
7149 {
7150 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7151 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7152 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7153 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7154 tmp_altivec));
7155 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7156 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7157 GEN_INT (64 - bits_in_element)));
7158 return;
7159 }
7160
7161 default:
7162 gcc_unreachable ();
7163 }
7164
7165 return;
7166 }
7167 else
7168 gcc_unreachable ();
7169 }
7170
7171 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7172 selects whether the alignment is abi mandated, optional, or
7173 both abi and optional alignment. */
7174
7175 unsigned int
7176 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7177 {
7178 if (how != align_opt)
7179 {
7180 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7181 align = 128;
7182 }
7183
7184 if (how != align_abi)
7185 {
7186 if (TREE_CODE (type) == ARRAY_TYPE
7187 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7188 {
7189 if (align < BITS_PER_WORD)
7190 align = BITS_PER_WORD;
7191 }
7192 }
7193
7194 return align;
7195 }
7196
7197 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7198 instructions simply ignore the low bits; VSX memory instructions
7199 are aligned to 4 or 8 bytes. */
7200
7201 static bool
7202 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7203 {
7204 return (STRICT_ALIGNMENT
7205 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7206 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7207 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7208 && (int) align < VECTOR_ALIGN (mode)))));
7209 }
7210
7211 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7212
7213 bool
7214 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7215 {
7216 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7217 {
7218 if (computed != 128)
7219 {
7220 static bool warned;
7221 if (!warned && warn_psabi)
7222 {
7223 warned = true;
7224 inform (input_location,
7225 "the layout of aggregates containing vectors with"
7226 " %d-byte alignment has changed in GCC 5",
7227 computed / BITS_PER_UNIT);
7228 }
7229 }
7230 /* In current GCC there is no special case. */
7231 return false;
7232 }
7233
7234 return false;
7235 }
7236
7237 /* AIX increases natural record alignment to doubleword if the first
7238 field is an FP double while the FP fields remain word aligned. */
7239
7240 unsigned int
7241 rs6000_special_round_type_align (tree type, unsigned int computed,
7242 unsigned int specified)
7243 {
7244 unsigned int align = MAX (computed, specified);
7245 tree field = TYPE_FIELDS (type);
7246
7247 /* Skip all non field decls */
7248 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7249 field = DECL_CHAIN (field);
7250
7251 if (field != NULL && field != type)
7252 {
7253 type = TREE_TYPE (field);
7254 while (TREE_CODE (type) == ARRAY_TYPE)
7255 type = TREE_TYPE (type);
7256
7257 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7258 align = MAX (align, 64);
7259 }
7260
7261 return align;
7262 }
7263
7264 /* Darwin increases record alignment to the natural alignment of
7265 the first field. */
7266
7267 unsigned int
7268 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7269 unsigned int specified)
7270 {
7271 unsigned int align = MAX (computed, specified);
7272
7273 if (TYPE_PACKED (type))
7274 return align;
7275
7276 /* Find the first field, looking down into aggregates. */
7277 do {
7278 tree field = TYPE_FIELDS (type);
7279 /* Skip all non field decls */
7280 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7281 field = DECL_CHAIN (field);
7282 if (! field)
7283 break;
7284 /* A packed field does not contribute any extra alignment. */
7285 if (DECL_PACKED (field))
7286 return align;
7287 type = TREE_TYPE (field);
7288 while (TREE_CODE (type) == ARRAY_TYPE)
7289 type = TREE_TYPE (type);
7290 } while (AGGREGATE_TYPE_P (type));
7291
7292 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7293 align = MAX (align, TYPE_ALIGN (type));
7294
7295 return align;
7296 }
7297
7298 /* Return 1 for an operand in small memory on V.4/eabi. */
7299
7300 int
7301 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7302 machine_mode mode ATTRIBUTE_UNUSED)
7303 {
7304 #if TARGET_ELF
7305 rtx sym_ref;
7306
7307 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7308 return 0;
7309
7310 if (DEFAULT_ABI != ABI_V4)
7311 return 0;
7312
7313 if (SYMBOL_REF_P (op))
7314 sym_ref = op;
7315
7316 else if (GET_CODE (op) != CONST
7317 || GET_CODE (XEXP (op, 0)) != PLUS
7318 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7319 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7320 return 0;
7321
7322 else
7323 {
7324 rtx sum = XEXP (op, 0);
7325 HOST_WIDE_INT summand;
7326
7327 /* We have to be careful here, because it is the referenced address
7328 that must be 32k from _SDA_BASE_, not just the symbol. */
7329 summand = INTVAL (XEXP (sum, 1));
7330 if (summand < 0 || summand > g_switch_value)
7331 return 0;
7332
7333 sym_ref = XEXP (sum, 0);
7334 }
7335
7336 return SYMBOL_REF_SMALL_P (sym_ref);
7337 #else
7338 return 0;
7339 #endif
7340 }
7341
7342 /* Return true if either operand is a general purpose register. */
7343
7344 bool
7345 gpr_or_gpr_p (rtx op0, rtx op1)
7346 {
7347 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7348 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7349 }
7350
7351 /* Return true if this is a move direct operation between GPR registers and
7352 floating point/VSX registers. */
7353
7354 bool
7355 direct_move_p (rtx op0, rtx op1)
7356 {
7357 if (!REG_P (op0) || !REG_P (op1))
7358 return false;
7359
7360 if (!TARGET_DIRECT_MOVE)
7361 return false;
7362
7363 int regno0 = REGNO (op0);
7364 int regno1 = REGNO (op1);
7365 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7366 return false;
7367
7368 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7369 return true;
7370
7371 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7372 return true;
7373
7374 return false;
7375 }
7376
7377 /* Return true if the ADDR is an acceptable address for a quad memory
7378 operation of mode MODE (either LQ/STQ for general purpose registers, or
7379 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7380 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7381 3.0 LXV/STXV instruction. */
7382
7383 bool
7384 quad_address_p (rtx addr, machine_mode mode, bool strict)
7385 {
7386 rtx op0, op1;
7387
7388 if (GET_MODE_SIZE (mode) != 16)
7389 return false;
7390
7391 if (legitimate_indirect_address_p (addr, strict))
7392 return true;
7393
7394 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7395 return false;
7396
7397 if (GET_CODE (addr) != PLUS)
7398 return false;
7399
7400 op0 = XEXP (addr, 0);
7401 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7402 return false;
7403
7404 op1 = XEXP (addr, 1);
7405 if (!CONST_INT_P (op1))
7406 return false;
7407
7408 return quad_address_offset_p (INTVAL (op1));
7409 }
7410
7411 /* Return true if this is a load or store quad operation. This function does
7412 not handle the atomic quad memory instructions. */
7413
7414 bool
7415 quad_load_store_p (rtx op0, rtx op1)
7416 {
7417 bool ret;
7418
7419 if (!TARGET_QUAD_MEMORY)
7420 ret = false;
7421
7422 else if (REG_P (op0) && MEM_P (op1))
7423 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7424 && quad_memory_operand (op1, GET_MODE (op1))
7425 && !reg_overlap_mentioned_p (op0, op1));
7426
7427 else if (MEM_P (op0) && REG_P (op1))
7428 ret = (quad_memory_operand (op0, GET_MODE (op0))
7429 && quad_int_reg_operand (op1, GET_MODE (op1)));
7430
7431 else
7432 ret = false;
7433
7434 if (TARGET_DEBUG_ADDR)
7435 {
7436 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7437 ret ? "true" : "false");
7438 debug_rtx (gen_rtx_SET (op0, op1));
7439 }
7440
7441 return ret;
7442 }
7443
7444 /* Given an address, return a constant offset term if one exists. */
7445
7446 static rtx
7447 address_offset (rtx op)
7448 {
7449 if (GET_CODE (op) == PRE_INC
7450 || GET_CODE (op) == PRE_DEC)
7451 op = XEXP (op, 0);
7452 else if (GET_CODE (op) == PRE_MODIFY
7453 || GET_CODE (op) == LO_SUM)
7454 op = XEXP (op, 1);
7455
7456 if (GET_CODE (op) == CONST)
7457 op = XEXP (op, 0);
7458
7459 if (GET_CODE (op) == PLUS)
7460 op = XEXP (op, 1);
7461
7462 if (CONST_INT_P (op))
7463 return op;
7464
7465 return NULL_RTX;
7466 }
7467
7468 /* Return true if the MEM operand is a memory operand suitable for use
7469 with a (full width, possibly multiple) gpr load/store. On
7470 powerpc64 this means the offset must be divisible by 4.
7471 Implements 'Y' constraint.
7472
7473 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7474 a constraint function we know the operand has satisfied a suitable
7475 memory predicate.
7476
7477 Offsetting a lo_sum should not be allowed, except where we know by
7478 alignment that a 32k boundary is not crossed. Note that by
7479 "offsetting" here we mean a further offset to access parts of the
7480 MEM. It's fine to have a lo_sum where the inner address is offset
7481 from a sym, since the same sym+offset will appear in the high part
7482 of the address calculation. */
7483
7484 bool
7485 mem_operand_gpr (rtx op, machine_mode mode)
7486 {
7487 unsigned HOST_WIDE_INT offset;
7488 int extra;
7489 rtx addr = XEXP (op, 0);
7490
7491 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7492 if (TARGET_UPDATE
7493 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7494 && mode_supports_pre_incdec_p (mode)
7495 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7496 return true;
7497
7498 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7499 if (!rs6000_offsettable_memref_p (op, mode, false))
7500 return false;
7501
7502 op = address_offset (addr);
7503 if (op == NULL_RTX)
7504 return true;
7505
7506 offset = INTVAL (op);
7507 if (TARGET_POWERPC64 && (offset & 3) != 0)
7508 return false;
7509
7510 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7511 if (extra < 0)
7512 extra = 0;
7513
7514 if (GET_CODE (addr) == LO_SUM)
7515 /* For lo_sum addresses, we must allow any offset except one that
7516 causes a wrap, so test only the low 16 bits. */
7517 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7518
7519 return offset + 0x8000 < 0x10000u - extra;
7520 }
7521
7522 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7523 enforce an offset divisible by 4 even for 32-bit. */
7524
7525 bool
7526 mem_operand_ds_form (rtx op, machine_mode mode)
7527 {
7528 unsigned HOST_WIDE_INT offset;
7529 int extra;
7530 rtx addr = XEXP (op, 0);
7531
7532 if (!offsettable_address_p (false, mode, addr))
7533 return false;
7534
7535 op = address_offset (addr);
7536 if (op == NULL_RTX)
7537 return true;
7538
7539 offset = INTVAL (op);
7540 if ((offset & 3) != 0)
7541 return false;
7542
7543 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7544 if (extra < 0)
7545 extra = 0;
7546
7547 if (GET_CODE (addr) == LO_SUM)
7548 /* For lo_sum addresses, we must allow any offset except one that
7549 causes a wrap, so test only the low 16 bits. */
7550 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7551
7552 return offset + 0x8000 < 0x10000u - extra;
7553 }
7554 \f
7555 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7556
7557 static bool
7558 reg_offset_addressing_ok_p (machine_mode mode)
7559 {
7560 switch (mode)
7561 {
7562 case E_V16QImode:
7563 case E_V8HImode:
7564 case E_V4SFmode:
7565 case E_V4SImode:
7566 case E_V2DFmode:
7567 case E_V2DImode:
7568 case E_V1TImode:
7569 case E_TImode:
7570 case E_TFmode:
7571 case E_KFmode:
7572 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7573 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7574 a vector mode, if we want to use the VSX registers to move it around,
7575 we need to restrict ourselves to reg+reg addressing. Similarly for
7576 IEEE 128-bit floating point that is passed in a single vector
7577 register. */
7578 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7579 return mode_supports_dq_form (mode);
7580 break;
7581
7582 case E_SDmode:
7583 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7584 addressing for the LFIWZX and STFIWX instructions. */
7585 if (TARGET_NO_SDMODE_STACK)
7586 return false;
7587 break;
7588
7589 default:
7590 break;
7591 }
7592
7593 return true;
7594 }
7595
7596 static bool
7597 virtual_stack_registers_memory_p (rtx op)
7598 {
7599 int regnum;
7600
7601 if (REG_P (op))
7602 regnum = REGNO (op);
7603
7604 else if (GET_CODE (op) == PLUS
7605 && REG_P (XEXP (op, 0))
7606 && CONST_INT_P (XEXP (op, 1)))
7607 regnum = REGNO (XEXP (op, 0));
7608
7609 else
7610 return false;
7611
7612 return (regnum >= FIRST_VIRTUAL_REGISTER
7613 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7614 }
7615
7616 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7617 is known to not straddle a 32k boundary. This function is used
7618 to determine whether -mcmodel=medium code can use TOC pointer
7619 relative addressing for OP. This means the alignment of the TOC
7620 pointer must also be taken into account, and unfortunately that is
7621 only 8 bytes. */
7622
7623 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7624 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7625 #endif
7626
7627 static bool
7628 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7629 machine_mode mode)
7630 {
7631 tree decl;
7632 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7633
7634 if (!SYMBOL_REF_P (op))
7635 return false;
7636
7637 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7638 SYMBOL_REF. */
7639 if (mode_supports_dq_form (mode))
7640 return false;
7641
7642 dsize = GET_MODE_SIZE (mode);
7643 decl = SYMBOL_REF_DECL (op);
7644 if (!decl)
7645 {
7646 if (dsize == 0)
7647 return false;
7648
7649 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7650 replacing memory addresses with an anchor plus offset. We
7651 could find the decl by rummaging around in the block->objects
7652 VEC for the given offset but that seems like too much work. */
7653 dalign = BITS_PER_UNIT;
7654 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7655 && SYMBOL_REF_ANCHOR_P (op)
7656 && SYMBOL_REF_BLOCK (op) != NULL)
7657 {
7658 struct object_block *block = SYMBOL_REF_BLOCK (op);
7659
7660 dalign = block->alignment;
7661 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7662 }
7663 else if (CONSTANT_POOL_ADDRESS_P (op))
7664 {
7665 /* It would be nice to have get_pool_align().. */
7666 machine_mode cmode = get_pool_mode (op);
7667
7668 dalign = GET_MODE_ALIGNMENT (cmode);
7669 }
7670 }
7671 else if (DECL_P (decl))
7672 {
7673 dalign = DECL_ALIGN (decl);
7674
7675 if (dsize == 0)
7676 {
7677 /* Allow BLKmode when the entire object is known to not
7678 cross a 32k boundary. */
7679 if (!DECL_SIZE_UNIT (decl))
7680 return false;
7681
7682 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7683 return false;
7684
7685 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7686 if (dsize > 32768)
7687 return false;
7688
7689 dalign /= BITS_PER_UNIT;
7690 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7691 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7692 return dalign >= dsize;
7693 }
7694 }
7695 else
7696 gcc_unreachable ();
7697
7698 /* Find how many bits of the alignment we know for this access. */
7699 dalign /= BITS_PER_UNIT;
7700 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7701 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7702 mask = dalign - 1;
7703 lsb = offset & -offset;
7704 mask &= lsb - 1;
7705 dalign = mask + 1;
7706
7707 return dalign >= dsize;
7708 }
7709
7710 static bool
7711 constant_pool_expr_p (rtx op)
7712 {
7713 rtx base, offset;
7714
7715 split_const (op, &base, &offset);
7716 return (SYMBOL_REF_P (base)
7717 && CONSTANT_POOL_ADDRESS_P (base)
7718 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7719 }
7720
7721 /* These are only used to pass through from print_operand/print_operand_address
7722 to rs6000_output_addr_const_extra over the intervening function
7723 output_addr_const which is not target code. */
7724 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7725
7726 /* Return true if OP is a toc pointer relative address (the output
7727 of create_TOC_reference). If STRICT, do not match non-split
7728 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7729 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7730 TOCREL_OFFSET_RET respectively. */
7731
7732 bool
7733 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7734 const_rtx *tocrel_offset_ret)
7735 {
7736 if (!TARGET_TOC)
7737 return false;
7738
7739 if (TARGET_CMODEL != CMODEL_SMALL)
7740 {
7741 /* When strict ensure we have everything tidy. */
7742 if (strict
7743 && !(GET_CODE (op) == LO_SUM
7744 && REG_P (XEXP (op, 0))
7745 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7746 return false;
7747
7748 /* When not strict, allow non-split TOC addresses and also allow
7749 (lo_sum (high ..)) TOC addresses created during reload. */
7750 if (GET_CODE (op) == LO_SUM)
7751 op = XEXP (op, 1);
7752 }
7753
7754 const_rtx tocrel_base = op;
7755 const_rtx tocrel_offset = const0_rtx;
7756
7757 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7758 {
7759 tocrel_base = XEXP (op, 0);
7760 tocrel_offset = XEXP (op, 1);
7761 }
7762
7763 if (tocrel_base_ret)
7764 *tocrel_base_ret = tocrel_base;
7765 if (tocrel_offset_ret)
7766 *tocrel_offset_ret = tocrel_offset;
7767
7768 return (GET_CODE (tocrel_base) == UNSPEC
7769 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7770 && REG_P (XVECEXP (tocrel_base, 0, 1))
7771 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7772 }
7773
7774 /* Return true if X is a constant pool address, and also for cmodel=medium
7775 if X is a toc-relative address known to be offsettable within MODE. */
7776
7777 bool
7778 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7779 bool strict)
7780 {
7781 const_rtx tocrel_base, tocrel_offset;
7782 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7783 && (TARGET_CMODEL != CMODEL_MEDIUM
7784 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7785 || mode == QImode
7786 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7787 INTVAL (tocrel_offset), mode)));
7788 }
7789
7790 static bool
7791 legitimate_small_data_p (machine_mode mode, rtx x)
7792 {
7793 return (DEFAULT_ABI == ABI_V4
7794 && !flag_pic && !TARGET_TOC
7795 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7796 && small_data_operand (x, mode));
7797 }
7798
7799 bool
7800 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7801 bool strict, bool worst_case)
7802 {
7803 unsigned HOST_WIDE_INT offset;
7804 unsigned int extra;
7805
7806 if (GET_CODE (x) != PLUS)
7807 return false;
7808 if (!REG_P (XEXP (x, 0)))
7809 return false;
7810 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7811 return false;
7812 if (mode_supports_dq_form (mode))
7813 return quad_address_p (x, mode, strict);
7814 if (!reg_offset_addressing_ok_p (mode))
7815 return virtual_stack_registers_memory_p (x);
7816 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7817 return true;
7818 if (!CONST_INT_P (XEXP (x, 1)))
7819 return false;
7820
7821 offset = INTVAL (XEXP (x, 1));
7822 extra = 0;
7823 switch (mode)
7824 {
7825 case E_DFmode:
7826 case E_DDmode:
7827 case E_DImode:
7828 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7829 addressing. */
7830 if (VECTOR_MEM_VSX_P (mode))
7831 return false;
7832
7833 if (!worst_case)
7834 break;
7835 if (!TARGET_POWERPC64)
7836 extra = 4;
7837 else if (offset & 3)
7838 return false;
7839 break;
7840
7841 case E_TFmode:
7842 case E_IFmode:
7843 case E_KFmode:
7844 case E_TDmode:
7845 case E_TImode:
7846 case E_PTImode:
7847 extra = 8;
7848 if (!worst_case)
7849 break;
7850 if (!TARGET_POWERPC64)
7851 extra = 12;
7852 else if (offset & 3)
7853 return false;
7854 break;
7855
7856 default:
7857 break;
7858 }
7859
7860 offset += 0x8000;
7861 return offset < 0x10000 - extra;
7862 }
7863
7864 bool
7865 legitimate_indexed_address_p (rtx x, int strict)
7866 {
7867 rtx op0, op1;
7868
7869 if (GET_CODE (x) != PLUS)
7870 return false;
7871
7872 op0 = XEXP (x, 0);
7873 op1 = XEXP (x, 1);
7874
7875 return (REG_P (op0) && REG_P (op1)
7876 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7877 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7878 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7879 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7880 }
7881
7882 bool
7883 avoiding_indexed_address_p (machine_mode mode)
7884 {
7885 /* Avoid indexed addressing for modes that have non-indexed
7886 load/store instruction forms. */
7887 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7888 }
7889
7890 bool
7891 legitimate_indirect_address_p (rtx x, int strict)
7892 {
7893 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7894 }
7895
7896 bool
7897 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7898 {
7899 if (!TARGET_MACHO || !flag_pic
7900 || mode != SImode || !MEM_P (x))
7901 return false;
7902 x = XEXP (x, 0);
7903
7904 if (GET_CODE (x) != LO_SUM)
7905 return false;
7906 if (!REG_P (XEXP (x, 0)))
7907 return false;
7908 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7909 return false;
7910 x = XEXP (x, 1);
7911
7912 return CONSTANT_P (x);
7913 }
7914
7915 static bool
7916 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7917 {
7918 if (GET_CODE (x) != LO_SUM)
7919 return false;
7920 if (!REG_P (XEXP (x, 0)))
7921 return false;
7922 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7923 return false;
7924 /* quad word addresses are restricted, and we can't use LO_SUM. */
7925 if (mode_supports_dq_form (mode))
7926 return false;
7927 x = XEXP (x, 1);
7928
7929 if (TARGET_ELF || TARGET_MACHO)
7930 {
7931 bool large_toc_ok;
7932
7933 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7934 return false;
7935 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7936 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7937 recognizes some LO_SUM addresses as valid although this
7938 function says opposite. In most cases, LRA through different
7939 transformations can generate correct code for address reloads.
7940 It cannot manage only some LO_SUM cases. So we need to add
7941 code here saying that some addresses are still valid. */
7942 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7943 && small_toc_ref (x, VOIDmode));
7944 if (TARGET_TOC && ! large_toc_ok)
7945 return false;
7946 if (GET_MODE_NUNITS (mode) != 1)
7947 return false;
7948 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7949 && !(/* ??? Assume floating point reg based on mode? */
7950 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
7951 return false;
7952
7953 return CONSTANT_P (x) || large_toc_ok;
7954 }
7955
7956 return false;
7957 }
7958
7959
7960 /* Try machine-dependent ways of modifying an illegitimate address
7961 to be legitimate. If we find one, return the new, valid address.
7962 This is used from only one place: `memory_address' in explow.c.
7963
7964 OLDX is the address as it was before break_out_memory_refs was
7965 called. In some cases it is useful to look at this to decide what
7966 needs to be done.
7967
7968 It is always safe for this function to do nothing. It exists to
7969 recognize opportunities to optimize the output.
7970
7971 On RS/6000, first check for the sum of a register with a constant
7972 integer that is out of range. If so, generate code to add the
7973 constant with the low-order 16 bits masked to the register and force
7974 this result into another register (this can be done with `cau').
7975 Then generate an address of REG+(CONST&0xffff), allowing for the
7976 possibility of bit 16 being a one.
7977
7978 Then check for the sum of a register and something not constant, try to
7979 load the other things into a register and return the sum. */
7980
7981 static rtx
7982 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7983 machine_mode mode)
7984 {
7985 unsigned int extra;
7986
7987 if (!reg_offset_addressing_ok_p (mode)
7988 || mode_supports_dq_form (mode))
7989 {
7990 if (virtual_stack_registers_memory_p (x))
7991 return x;
7992
7993 /* In theory we should not be seeing addresses of the form reg+0,
7994 but just in case it is generated, optimize it away. */
7995 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7996 return force_reg (Pmode, XEXP (x, 0));
7997
7998 /* For TImode with load/store quad, restrict addresses to just a single
7999 pointer, so it works with both GPRs and VSX registers. */
8000 /* Make sure both operands are registers. */
8001 else if (GET_CODE (x) == PLUS
8002 && (mode != TImode || !TARGET_VSX))
8003 return gen_rtx_PLUS (Pmode,
8004 force_reg (Pmode, XEXP (x, 0)),
8005 force_reg (Pmode, XEXP (x, 1)));
8006 else
8007 return force_reg (Pmode, x);
8008 }
8009 if (SYMBOL_REF_P (x))
8010 {
8011 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8012 if (model != 0)
8013 return rs6000_legitimize_tls_address (x, model);
8014 }
8015
8016 extra = 0;
8017 switch (mode)
8018 {
8019 case E_TFmode:
8020 case E_TDmode:
8021 case E_TImode:
8022 case E_PTImode:
8023 case E_IFmode:
8024 case E_KFmode:
8025 /* As in legitimate_offset_address_p we do not assume
8026 worst-case. The mode here is just a hint as to the registers
8027 used. A TImode is usually in gprs, but may actually be in
8028 fprs. Leave worst-case scenario for reload to handle via
8029 insn constraints. PTImode is only GPRs. */
8030 extra = 8;
8031 break;
8032 default:
8033 break;
8034 }
8035
8036 if (GET_CODE (x) == PLUS
8037 && REG_P (XEXP (x, 0))
8038 && CONST_INT_P (XEXP (x, 1))
8039 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8040 >= 0x10000 - extra))
8041 {
8042 HOST_WIDE_INT high_int, low_int;
8043 rtx sum;
8044 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8045 if (low_int >= 0x8000 - extra)
8046 low_int = 0;
8047 high_int = INTVAL (XEXP (x, 1)) - low_int;
8048 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8049 GEN_INT (high_int)), 0);
8050 return plus_constant (Pmode, sum, low_int);
8051 }
8052 else if (GET_CODE (x) == PLUS
8053 && REG_P (XEXP (x, 0))
8054 && !CONST_INT_P (XEXP (x, 1))
8055 && GET_MODE_NUNITS (mode) == 1
8056 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8057 || (/* ??? Assume floating point reg based on mode? */
8058 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8059 && !avoiding_indexed_address_p (mode))
8060 {
8061 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8062 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8063 }
8064 else if ((TARGET_ELF
8065 #if TARGET_MACHO
8066 || !MACHO_DYNAMIC_NO_PIC_P
8067 #endif
8068 )
8069 && TARGET_32BIT
8070 && TARGET_NO_TOC
8071 && !flag_pic
8072 && !CONST_INT_P (x)
8073 && !CONST_WIDE_INT_P (x)
8074 && !CONST_DOUBLE_P (x)
8075 && CONSTANT_P (x)
8076 && GET_MODE_NUNITS (mode) == 1
8077 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8078 || (/* ??? Assume floating point reg based on mode? */
8079 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8080 {
8081 rtx reg = gen_reg_rtx (Pmode);
8082 if (TARGET_ELF)
8083 emit_insn (gen_elf_high (reg, x));
8084 else
8085 emit_insn (gen_macho_high (reg, x));
8086 return gen_rtx_LO_SUM (Pmode, reg, x);
8087 }
8088 else if (TARGET_TOC
8089 && SYMBOL_REF_P (x)
8090 && constant_pool_expr_p (x)
8091 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8092 return create_TOC_reference (x, NULL_RTX);
8093 else
8094 return x;
8095 }
8096
8097 /* Debug version of rs6000_legitimize_address. */
8098 static rtx
8099 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8100 {
8101 rtx ret;
8102 rtx_insn *insns;
8103
8104 start_sequence ();
8105 ret = rs6000_legitimize_address (x, oldx, mode);
8106 insns = get_insns ();
8107 end_sequence ();
8108
8109 if (ret != x)
8110 {
8111 fprintf (stderr,
8112 "\nrs6000_legitimize_address: mode %s, old code %s, "
8113 "new code %s, modified\n",
8114 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8115 GET_RTX_NAME (GET_CODE (ret)));
8116
8117 fprintf (stderr, "Original address:\n");
8118 debug_rtx (x);
8119
8120 fprintf (stderr, "oldx:\n");
8121 debug_rtx (oldx);
8122
8123 fprintf (stderr, "New address:\n");
8124 debug_rtx (ret);
8125
8126 if (insns)
8127 {
8128 fprintf (stderr, "Insns added:\n");
8129 debug_rtx_list (insns, 20);
8130 }
8131 }
8132 else
8133 {
8134 fprintf (stderr,
8135 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8136 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8137
8138 debug_rtx (x);
8139 }
8140
8141 if (insns)
8142 emit_insn (insns);
8143
8144 return ret;
8145 }
8146
8147 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8148 We need to emit DTP-relative relocations. */
8149
8150 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8151 static void
8152 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8153 {
8154 switch (size)
8155 {
8156 case 4:
8157 fputs ("\t.long\t", file);
8158 break;
8159 case 8:
8160 fputs (DOUBLE_INT_ASM_OP, file);
8161 break;
8162 default:
8163 gcc_unreachable ();
8164 }
8165 output_addr_const (file, x);
8166 if (TARGET_ELF)
8167 fputs ("@dtprel+0x8000", file);
8168 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8169 {
8170 switch (SYMBOL_REF_TLS_MODEL (x))
8171 {
8172 case 0:
8173 break;
8174 case TLS_MODEL_LOCAL_EXEC:
8175 fputs ("@le", file);
8176 break;
8177 case TLS_MODEL_INITIAL_EXEC:
8178 fputs ("@ie", file);
8179 break;
8180 case TLS_MODEL_GLOBAL_DYNAMIC:
8181 case TLS_MODEL_LOCAL_DYNAMIC:
8182 fputs ("@m", file);
8183 break;
8184 default:
8185 gcc_unreachable ();
8186 }
8187 }
8188 }
8189
8190 /* Return true if X is a symbol that refers to real (rather than emulated)
8191 TLS. */
8192
8193 static bool
8194 rs6000_real_tls_symbol_ref_p (rtx x)
8195 {
8196 return (SYMBOL_REF_P (x)
8197 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8198 }
8199
8200 /* In the name of slightly smaller debug output, and to cater to
8201 general assembler lossage, recognize various UNSPEC sequences
8202 and turn them back into a direct symbol reference. */
8203
8204 static rtx
8205 rs6000_delegitimize_address (rtx orig_x)
8206 {
8207 rtx x, y, offset;
8208
8209 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8210 orig_x = XVECEXP (orig_x, 0, 0);
8211
8212 orig_x = delegitimize_mem_from_attrs (orig_x);
8213
8214 x = orig_x;
8215 if (MEM_P (x))
8216 x = XEXP (x, 0);
8217
8218 y = x;
8219 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8220 y = XEXP (y, 1);
8221
8222 offset = NULL_RTX;
8223 if (GET_CODE (y) == PLUS
8224 && GET_MODE (y) == Pmode
8225 && CONST_INT_P (XEXP (y, 1)))
8226 {
8227 offset = XEXP (y, 1);
8228 y = XEXP (y, 0);
8229 }
8230
8231 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8232 {
8233 y = XVECEXP (y, 0, 0);
8234
8235 #ifdef HAVE_AS_TLS
8236 /* Do not associate thread-local symbols with the original
8237 constant pool symbol. */
8238 if (TARGET_XCOFF
8239 && SYMBOL_REF_P (y)
8240 && CONSTANT_POOL_ADDRESS_P (y)
8241 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8242 return orig_x;
8243 #endif
8244
8245 if (offset != NULL_RTX)
8246 y = gen_rtx_PLUS (Pmode, y, offset);
8247 if (!MEM_P (orig_x))
8248 return y;
8249 else
8250 return replace_equiv_address_nv (orig_x, y);
8251 }
8252
8253 if (TARGET_MACHO
8254 && GET_CODE (orig_x) == LO_SUM
8255 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8256 {
8257 y = XEXP (XEXP (orig_x, 1), 0);
8258 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8259 return XVECEXP (y, 0, 0);
8260 }
8261
8262 return orig_x;
8263 }
8264
8265 /* Return true if X shouldn't be emitted into the debug info.
8266 The linker doesn't like .toc section references from
8267 .debug_* sections, so reject .toc section symbols. */
8268
8269 static bool
8270 rs6000_const_not_ok_for_debug_p (rtx x)
8271 {
8272 if (GET_CODE (x) == UNSPEC)
8273 return true;
8274 if (SYMBOL_REF_P (x)
8275 && CONSTANT_POOL_ADDRESS_P (x))
8276 {
8277 rtx c = get_pool_constant (x);
8278 machine_mode cmode = get_pool_mode (x);
8279 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8280 return true;
8281 }
8282
8283 return false;
8284 }
8285
8286 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8287
8288 static bool
8289 rs6000_legitimate_combined_insn (rtx_insn *insn)
8290 {
8291 int icode = INSN_CODE (insn);
8292
8293 /* Reject creating doloop insns. Combine should not be allowed
8294 to create these for a number of reasons:
8295 1) In a nested loop, if combine creates one of these in an
8296 outer loop and the register allocator happens to allocate ctr
8297 to the outer loop insn, then the inner loop can't use ctr.
8298 Inner loops ought to be more highly optimized.
8299 2) Combine often wants to create one of these from what was
8300 originally a three insn sequence, first combining the three
8301 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8302 allocated ctr, the splitter takes use back to the three insn
8303 sequence. It's better to stop combine at the two insn
8304 sequence.
8305 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8306 insns, the register allocator sometimes uses floating point
8307 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8308 jump insn and output reloads are not implemented for jumps,
8309 the ctrsi/ctrdi splitters need to handle all possible cases.
8310 That's a pain, and it gets to be seriously difficult when a
8311 splitter that runs after reload needs memory to transfer from
8312 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8313 for the difficult case. It's better to not create problems
8314 in the first place. */
8315 if (icode != CODE_FOR_nothing
8316 && (icode == CODE_FOR_bdz_si
8317 || icode == CODE_FOR_bdz_di
8318 || icode == CODE_FOR_bdnz_si
8319 || icode == CODE_FOR_bdnz_di
8320 || icode == CODE_FOR_bdztf_si
8321 || icode == CODE_FOR_bdztf_di
8322 || icode == CODE_FOR_bdnztf_si
8323 || icode == CODE_FOR_bdnztf_di))
8324 return false;
8325
8326 return true;
8327 }
8328
8329 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8330
8331 static GTY(()) rtx rs6000_tls_symbol;
8332 static rtx
8333 rs6000_tls_get_addr (void)
8334 {
8335 if (!rs6000_tls_symbol)
8336 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8337
8338 return rs6000_tls_symbol;
8339 }
8340
8341 /* Construct the SYMBOL_REF for TLS GOT references. */
8342
8343 static GTY(()) rtx rs6000_got_symbol;
8344 rtx
8345 rs6000_got_sym (void)
8346 {
8347 if (!rs6000_got_symbol)
8348 {
8349 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8350 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8351 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8352 }
8353
8354 return rs6000_got_symbol;
8355 }
8356
8357 /* AIX Thread-Local Address support. */
8358
8359 static rtx
8360 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8361 {
8362 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8363 const char *name;
8364 char *tlsname;
8365
8366 name = XSTR (addr, 0);
8367 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8368 or the symbol will be in TLS private data section. */
8369 if (name[strlen (name) - 1] != ']'
8370 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8371 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8372 {
8373 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8374 strcpy (tlsname, name);
8375 strcat (tlsname,
8376 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8377 tlsaddr = copy_rtx (addr);
8378 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8379 }
8380 else
8381 tlsaddr = addr;
8382
8383 /* Place addr into TOC constant pool. */
8384 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8385
8386 /* Output the TOC entry and create the MEM referencing the value. */
8387 if (constant_pool_expr_p (XEXP (sym, 0))
8388 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8389 {
8390 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8391 mem = gen_const_mem (Pmode, tocref);
8392 set_mem_alias_set (mem, get_TOC_alias_set ());
8393 }
8394 else
8395 return sym;
8396
8397 /* Use global-dynamic for local-dynamic. */
8398 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8399 || model == TLS_MODEL_LOCAL_DYNAMIC)
8400 {
8401 /* Create new TOC reference for @m symbol. */
8402 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8403 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8404 strcpy (tlsname, "*LCM");
8405 strcat (tlsname, name + 3);
8406 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8407 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8408 tocref = create_TOC_reference (modaddr, NULL_RTX);
8409 rtx modmem = gen_const_mem (Pmode, tocref);
8410 set_mem_alias_set (modmem, get_TOC_alias_set ());
8411
8412 rtx modreg = gen_reg_rtx (Pmode);
8413 emit_insn (gen_rtx_SET (modreg, modmem));
8414
8415 tmpreg = gen_reg_rtx (Pmode);
8416 emit_insn (gen_rtx_SET (tmpreg, mem));
8417
8418 dest = gen_reg_rtx (Pmode);
8419 if (TARGET_32BIT)
8420 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8421 else
8422 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8423 return dest;
8424 }
8425 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8426 else if (TARGET_32BIT)
8427 {
8428 tlsreg = gen_reg_rtx (SImode);
8429 emit_insn (gen_tls_get_tpointer (tlsreg));
8430 }
8431 else
8432 tlsreg = gen_rtx_REG (DImode, 13);
8433
8434 /* Load the TOC value into temporary register. */
8435 tmpreg = gen_reg_rtx (Pmode);
8436 emit_insn (gen_rtx_SET (tmpreg, mem));
8437 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8438 gen_rtx_MINUS (Pmode, addr, tlsreg));
8439
8440 /* Add TOC symbol value to TLS pointer. */
8441 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8442
8443 return dest;
8444 }
8445
8446 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8447 __tls_get_addr call. */
8448
8449 void
8450 rs6000_output_tlsargs (rtx *operands)
8451 {
8452 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8453 rtx op[3];
8454
8455 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8456 op[0] = operands[0];
8457 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8458 op[1] = XVECEXP (operands[2], 0, 0);
8459 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8460 {
8461 /* The GOT register. */
8462 op[2] = XVECEXP (operands[2], 0, 1);
8463 if (TARGET_CMODEL != CMODEL_SMALL)
8464 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8465 "addi %0,%0,%1@got@tlsgd@l", op);
8466 else
8467 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8468 }
8469 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8470 {
8471 if (TARGET_CMODEL != CMODEL_SMALL)
8472 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8473 "addi %0,%0,%&@got@tlsld@l", op);
8474 else
8475 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8476 }
8477 else
8478 gcc_unreachable ();
8479 }
8480
8481 /* Passes the tls arg value for global dynamic and local dynamic
8482 emit_library_call_value in rs6000_legitimize_tls_address to
8483 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8484 marker relocs put on __tls_get_addr calls. */
8485 static rtx global_tlsarg;
8486
8487 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8488 this (thread-local) address. */
8489
8490 static rtx
8491 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8492 {
8493 rtx dest, insn;
8494
8495 if (TARGET_XCOFF)
8496 return rs6000_legitimize_tls_address_aix (addr, model);
8497
8498 dest = gen_reg_rtx (Pmode);
8499 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8500 {
8501 rtx tlsreg;
8502
8503 if (TARGET_64BIT)
8504 {
8505 tlsreg = gen_rtx_REG (Pmode, 13);
8506 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8507 }
8508 else
8509 {
8510 tlsreg = gen_rtx_REG (Pmode, 2);
8511 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8512 }
8513 emit_insn (insn);
8514 }
8515 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8516 {
8517 rtx tlsreg, tmp;
8518
8519 tmp = gen_reg_rtx (Pmode);
8520 if (TARGET_64BIT)
8521 {
8522 tlsreg = gen_rtx_REG (Pmode, 13);
8523 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8524 }
8525 else
8526 {
8527 tlsreg = gen_rtx_REG (Pmode, 2);
8528 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8529 }
8530 emit_insn (insn);
8531 if (TARGET_64BIT)
8532 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8533 else
8534 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8535 emit_insn (insn);
8536 }
8537 else
8538 {
8539 rtx got, tga, tmp1, tmp2;
8540
8541 /* We currently use relocations like @got@tlsgd for tls, which
8542 means the linker will handle allocation of tls entries, placing
8543 them in the .got section. So use a pointer to the .got section,
8544 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8545 or to secondary GOT sections used by 32-bit -fPIC. */
8546 if (TARGET_64BIT)
8547 got = gen_rtx_REG (Pmode, 2);
8548 else
8549 {
8550 if (flag_pic == 1)
8551 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8552 else
8553 {
8554 rtx gsym = rs6000_got_sym ();
8555 got = gen_reg_rtx (Pmode);
8556 if (flag_pic == 0)
8557 rs6000_emit_move (got, gsym, Pmode);
8558 else
8559 {
8560 rtx mem, lab;
8561
8562 tmp1 = gen_reg_rtx (Pmode);
8563 tmp2 = gen_reg_rtx (Pmode);
8564 mem = gen_const_mem (Pmode, tmp1);
8565 lab = gen_label_rtx ();
8566 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8567 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8568 if (TARGET_LINK_STACK)
8569 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8570 emit_move_insn (tmp2, mem);
8571 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8572 set_unique_reg_note (last, REG_EQUAL, gsym);
8573 }
8574 }
8575 }
8576
8577 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8578 {
8579 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8580 UNSPEC_TLSGD);
8581 tga = rs6000_tls_get_addr ();
8582 global_tlsarg = arg;
8583 if (TARGET_TLS_MARKERS)
8584 {
8585 rtx argreg = gen_rtx_REG (Pmode, 3);
8586 emit_insn (gen_rtx_SET (argreg, arg));
8587 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8588 argreg, Pmode);
8589 }
8590 else
8591 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8592 global_tlsarg = NULL_RTX;
8593
8594 /* Make a note so that the result of this call can be CSEd. */
8595 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8596 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8597 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8598 }
8599 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8600 {
8601 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8602 tga = rs6000_tls_get_addr ();
8603 tmp1 = gen_reg_rtx (Pmode);
8604 global_tlsarg = arg;
8605 if (TARGET_TLS_MARKERS)
8606 {
8607 rtx argreg = gen_rtx_REG (Pmode, 3);
8608 emit_insn (gen_rtx_SET (argreg, arg));
8609 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8610 argreg, Pmode);
8611 }
8612 else
8613 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8614 global_tlsarg = NULL_RTX;
8615
8616 /* Make a note so that the result of this call can be CSEd. */
8617 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8618 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8619 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8620
8621 if (rs6000_tls_size == 16)
8622 {
8623 if (TARGET_64BIT)
8624 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8625 else
8626 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8627 }
8628 else if (rs6000_tls_size == 32)
8629 {
8630 tmp2 = gen_reg_rtx (Pmode);
8631 if (TARGET_64BIT)
8632 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8633 else
8634 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8635 emit_insn (insn);
8636 if (TARGET_64BIT)
8637 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8638 else
8639 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8640 }
8641 else
8642 {
8643 tmp2 = gen_reg_rtx (Pmode);
8644 if (TARGET_64BIT)
8645 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8646 else
8647 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8648 emit_insn (insn);
8649 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8650 }
8651 emit_insn (insn);
8652 }
8653 else
8654 {
8655 /* IE, or 64-bit offset LE. */
8656 tmp2 = gen_reg_rtx (Pmode);
8657 if (TARGET_64BIT)
8658 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8659 else
8660 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8661 emit_insn (insn);
8662 if (TARGET_64BIT)
8663 insn = gen_tls_tls_64 (dest, tmp2, addr);
8664 else
8665 insn = gen_tls_tls_32 (dest, tmp2, addr);
8666 emit_insn (insn);
8667 }
8668 }
8669
8670 return dest;
8671 }
8672
8673 /* Only create the global variable for the stack protect guard if we are using
8674 the global flavor of that guard. */
8675 static tree
8676 rs6000_init_stack_protect_guard (void)
8677 {
8678 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8679 return default_stack_protect_guard ();
8680
8681 return NULL_TREE;
8682 }
8683
8684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8685
8686 static bool
8687 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8688 {
8689 if (GET_CODE (x) == HIGH
8690 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8691 return true;
8692
8693 /* A TLS symbol in the TOC cannot contain a sum. */
8694 if (GET_CODE (x) == CONST
8695 && GET_CODE (XEXP (x, 0)) == PLUS
8696 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8697 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8698 return true;
8699
8700 /* Do not place an ELF TLS symbol in the constant pool. */
8701 return TARGET_ELF && tls_referenced_p (x);
8702 }
8703
8704 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8705 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8706 can be addressed relative to the toc pointer. */
8707
8708 static bool
8709 use_toc_relative_ref (rtx sym, machine_mode mode)
8710 {
8711 return ((constant_pool_expr_p (sym)
8712 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8713 get_pool_mode (sym)))
8714 || (TARGET_CMODEL == CMODEL_MEDIUM
8715 && SYMBOL_REF_LOCAL_P (sym)
8716 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8717 }
8718
8719 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8720 that is a valid memory address for an instruction.
8721 The MODE argument is the machine mode for the MEM expression
8722 that wants to use this address.
8723
8724 On the RS/6000, there are four valid address: a SYMBOL_REF that
8725 refers to a constant pool entry of an address (or the sum of it
8726 plus a constant), a short (16-bit signed) constant plus a register,
8727 the sum of two registers, or a register indirect, possibly with an
8728 auto-increment. For DFmode, DDmode and DImode with a constant plus
8729 register, we must ensure that both words are addressable or PowerPC64
8730 with offset word aligned.
8731
8732 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8733 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8734 because adjacent memory cells are accessed by adding word-sized offsets
8735 during assembly output. */
8736 static bool
8737 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8738 {
8739 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8740 bool quad_offset_p = mode_supports_dq_form (mode);
8741
8742 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8743 if (VECTOR_MEM_ALTIVEC_P (mode)
8744 && GET_CODE (x) == AND
8745 && CONST_INT_P (XEXP (x, 1))
8746 && INTVAL (XEXP (x, 1)) == -16)
8747 x = XEXP (x, 0);
8748
8749 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8750 return 0;
8751 if (legitimate_indirect_address_p (x, reg_ok_strict))
8752 return 1;
8753 if (TARGET_UPDATE
8754 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8755 && mode_supports_pre_incdec_p (mode)
8756 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8757 return 1;
8758 /* Handle restricted vector d-form offsets in ISA 3.0. */
8759 if (quad_offset_p)
8760 {
8761 if (quad_address_p (x, mode, reg_ok_strict))
8762 return 1;
8763 }
8764 else if (virtual_stack_registers_memory_p (x))
8765 return 1;
8766
8767 else if (reg_offset_p)
8768 {
8769 if (legitimate_small_data_p (mode, x))
8770 return 1;
8771 if (legitimate_constant_pool_address_p (x, mode,
8772 reg_ok_strict || lra_in_progress))
8773 return 1;
8774 }
8775
8776 /* For TImode, if we have TImode in VSX registers, only allow register
8777 indirect addresses. This will allow the values to go in either GPRs
8778 or VSX registers without reloading. The vector types would tend to
8779 go into VSX registers, so we allow REG+REG, while TImode seems
8780 somewhat split, in that some uses are GPR based, and some VSX based. */
8781 /* FIXME: We could loosen this by changing the following to
8782 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8783 but currently we cannot allow REG+REG addressing for TImode. See
8784 PR72827 for complete details on how this ends up hoodwinking DSE. */
8785 if (mode == TImode && TARGET_VSX)
8786 return 0;
8787 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8788 if (! reg_ok_strict
8789 && reg_offset_p
8790 && GET_CODE (x) == PLUS
8791 && REG_P (XEXP (x, 0))
8792 && (XEXP (x, 0) == virtual_stack_vars_rtx
8793 || XEXP (x, 0) == arg_pointer_rtx)
8794 && CONST_INT_P (XEXP (x, 1)))
8795 return 1;
8796 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8797 return 1;
8798 if (!FLOAT128_2REG_P (mode)
8799 && (TARGET_HARD_FLOAT
8800 || TARGET_POWERPC64
8801 || (mode != DFmode && mode != DDmode))
8802 && (TARGET_POWERPC64 || mode != DImode)
8803 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8804 && mode != PTImode
8805 && !avoiding_indexed_address_p (mode)
8806 && legitimate_indexed_address_p (x, reg_ok_strict))
8807 return 1;
8808 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8809 && mode_supports_pre_modify_p (mode)
8810 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8811 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8812 reg_ok_strict, false)
8813 || (!avoiding_indexed_address_p (mode)
8814 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8815 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8816 return 1;
8817 if (reg_offset_p && !quad_offset_p
8818 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8819 return 1;
8820 return 0;
8821 }
8822
8823 /* Debug version of rs6000_legitimate_address_p. */
8824 static bool
8825 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8826 bool reg_ok_strict)
8827 {
8828 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8829 fprintf (stderr,
8830 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8831 "strict = %d, reload = %s, code = %s\n",
8832 ret ? "true" : "false",
8833 GET_MODE_NAME (mode),
8834 reg_ok_strict,
8835 (reload_completed ? "after" : "before"),
8836 GET_RTX_NAME (GET_CODE (x)));
8837 debug_rtx (x);
8838
8839 return ret;
8840 }
8841
8842 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8843
8844 static bool
8845 rs6000_mode_dependent_address_p (const_rtx addr,
8846 addr_space_t as ATTRIBUTE_UNUSED)
8847 {
8848 return rs6000_mode_dependent_address_ptr (addr);
8849 }
8850
8851 /* Go to LABEL if ADDR (a legitimate address expression)
8852 has an effect that depends on the machine mode it is used for.
8853
8854 On the RS/6000 this is true of all integral offsets (since AltiVec
8855 and VSX modes don't allow them) or is a pre-increment or decrement.
8856
8857 ??? Except that due to conceptual problems in offsettable_address_p
8858 we can't really report the problems of integral offsets. So leave
8859 this assuming that the adjustable offset must be valid for the
8860 sub-words of a TFmode operand, which is what we had before. */
8861
8862 static bool
8863 rs6000_mode_dependent_address (const_rtx addr)
8864 {
8865 switch (GET_CODE (addr))
8866 {
8867 case PLUS:
8868 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8869 is considered a legitimate address before reload, so there
8870 are no offset restrictions in that case. Note that this
8871 condition is safe in strict mode because any address involving
8872 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8873 been rejected as illegitimate. */
8874 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8875 && XEXP (addr, 0) != arg_pointer_rtx
8876 && CONST_INT_P (XEXP (addr, 1)))
8877 {
8878 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8879 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8880 }
8881 break;
8882
8883 case LO_SUM:
8884 /* Anything in the constant pool is sufficiently aligned that
8885 all bytes have the same high part address. */
8886 return !legitimate_constant_pool_address_p (addr, QImode, false);
8887
8888 /* Auto-increment cases are now treated generically in recog.c. */
8889 case PRE_MODIFY:
8890 return TARGET_UPDATE;
8891
8892 /* AND is only allowed in Altivec loads. */
8893 case AND:
8894 return true;
8895
8896 default:
8897 break;
8898 }
8899
8900 return false;
8901 }
8902
8903 /* Debug version of rs6000_mode_dependent_address. */
8904 static bool
8905 rs6000_debug_mode_dependent_address (const_rtx addr)
8906 {
8907 bool ret = rs6000_mode_dependent_address (addr);
8908
8909 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8910 ret ? "true" : "false");
8911 debug_rtx (addr);
8912
8913 return ret;
8914 }
8915
8916 /* Implement FIND_BASE_TERM. */
8917
8918 rtx
8919 rs6000_find_base_term (rtx op)
8920 {
8921 rtx base;
8922
8923 base = op;
8924 if (GET_CODE (base) == CONST)
8925 base = XEXP (base, 0);
8926 if (GET_CODE (base) == PLUS)
8927 base = XEXP (base, 0);
8928 if (GET_CODE (base) == UNSPEC)
8929 switch (XINT (base, 1))
8930 {
8931 case UNSPEC_TOCREL:
8932 case UNSPEC_MACHOPIC_OFFSET:
8933 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8934 for aliasing purposes. */
8935 return XVECEXP (base, 0, 0);
8936 }
8937
8938 return op;
8939 }
8940
8941 /* More elaborate version of recog's offsettable_memref_p predicate
8942 that works around the ??? note of rs6000_mode_dependent_address.
8943 In particular it accepts
8944
8945 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8946
8947 in 32-bit mode, that the recog predicate rejects. */
8948
8949 static bool
8950 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8951 {
8952 bool worst_case;
8953
8954 if (!MEM_P (op))
8955 return false;
8956
8957 /* First mimic offsettable_memref_p. */
8958 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
8959 return true;
8960
8961 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8962 the latter predicate knows nothing about the mode of the memory
8963 reference and, therefore, assumes that it is the largest supported
8964 mode (TFmode). As a consequence, legitimate offsettable memory
8965 references are rejected. rs6000_legitimate_offset_address_p contains
8966 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8967 at least with a little bit of help here given that we know the
8968 actual registers used. */
8969 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8970 || GET_MODE_SIZE (reg_mode) == 4);
8971 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8972 strict, worst_case);
8973 }
8974
8975 /* Determine the reassociation width to be used in reassociate_bb.
8976 This takes into account how many parallel operations we
8977 can actually do of a given type, and also the latency.
8978 P8:
8979 int add/sub 6/cycle
8980 mul 2/cycle
8981 vect add/sub/mul 2/cycle
8982 fp add/sub/mul 2/cycle
8983 dfp 1/cycle
8984 */
8985
8986 static int
8987 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
8988 machine_mode mode)
8989 {
8990 switch (rs6000_tune)
8991 {
8992 case PROCESSOR_POWER8:
8993 case PROCESSOR_POWER9:
8994 case PROCESSOR_FUTURE:
8995 if (DECIMAL_FLOAT_MODE_P (mode))
8996 return 1;
8997 if (VECTOR_MODE_P (mode))
8998 return 4;
8999 if (INTEGRAL_MODE_P (mode))
9000 return 1;
9001 if (FLOAT_MODE_P (mode))
9002 return 4;
9003 break;
9004 default:
9005 break;
9006 }
9007 return 1;
9008 }
9009
9010 /* Change register usage conditional on target flags. */
9011 static void
9012 rs6000_conditional_register_usage (void)
9013 {
9014 int i;
9015
9016 if (TARGET_DEBUG_TARGET)
9017 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9018
9019 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9020 if (TARGET_64BIT)
9021 fixed_regs[13] = call_used_regs[13]
9022 = call_really_used_regs[13] = 1;
9023
9024 /* Conditionally disable FPRs. */
9025 if (TARGET_SOFT_FLOAT)
9026 for (i = 32; i < 64; i++)
9027 fixed_regs[i] = call_used_regs[i]
9028 = call_really_used_regs[i] = 1;
9029
9030 /* The TOC register is not killed across calls in a way that is
9031 visible to the compiler. */
9032 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9033 call_really_used_regs[2] = 0;
9034
9035 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9036 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9037
9038 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9039 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9040 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9041 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9042
9043 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9044 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9045 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9046 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9047
9048 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9049 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9050 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9051
9052 if (!TARGET_ALTIVEC && !TARGET_VSX)
9053 {
9054 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9055 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9056 call_really_used_regs[VRSAVE_REGNO] = 1;
9057 }
9058
9059 if (TARGET_ALTIVEC || TARGET_VSX)
9060 global_regs[VSCR_REGNO] = 1;
9061
9062 if (TARGET_ALTIVEC_ABI)
9063 {
9064 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9065 call_used_regs[i] = call_really_used_regs[i] = 1;
9066
9067 /* AIX reserves VR20:31 in non-extended ABI mode. */
9068 if (TARGET_XCOFF)
9069 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9070 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9071 }
9072 }
9073
9074 \f
9075 /* Output insns to set DEST equal to the constant SOURCE as a series of
9076 lis, ori and shl instructions and return TRUE. */
9077
9078 bool
9079 rs6000_emit_set_const (rtx dest, rtx source)
9080 {
9081 machine_mode mode = GET_MODE (dest);
9082 rtx temp, set;
9083 rtx_insn *insn;
9084 HOST_WIDE_INT c;
9085
9086 gcc_checking_assert (CONST_INT_P (source));
9087 c = INTVAL (source);
9088 switch (mode)
9089 {
9090 case E_QImode:
9091 case E_HImode:
9092 emit_insn (gen_rtx_SET (dest, source));
9093 return true;
9094
9095 case E_SImode:
9096 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9097
9098 emit_insn (gen_rtx_SET (copy_rtx (temp),
9099 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9100 emit_insn (gen_rtx_SET (dest,
9101 gen_rtx_IOR (SImode, copy_rtx (temp),
9102 GEN_INT (c & 0xffff))));
9103 break;
9104
9105 case E_DImode:
9106 if (!TARGET_POWERPC64)
9107 {
9108 rtx hi, lo;
9109
9110 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9111 DImode);
9112 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9113 DImode);
9114 emit_move_insn (hi, GEN_INT (c >> 32));
9115 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9116 emit_move_insn (lo, GEN_INT (c));
9117 }
9118 else
9119 rs6000_emit_set_long_const (dest, c);
9120 break;
9121
9122 default:
9123 gcc_unreachable ();
9124 }
9125
9126 insn = get_last_insn ();
9127 set = single_set (insn);
9128 if (! CONSTANT_P (SET_SRC (set)))
9129 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9130
9131 return true;
9132 }
9133
9134 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9135 Output insns to set DEST equal to the constant C as a series of
9136 lis, ori and shl instructions. */
9137
9138 static void
9139 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9140 {
9141 rtx temp;
9142 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9143
9144 ud1 = c & 0xffff;
9145 c = c >> 16;
9146 ud2 = c & 0xffff;
9147 c = c >> 16;
9148 ud3 = c & 0xffff;
9149 c = c >> 16;
9150 ud4 = c & 0xffff;
9151
9152 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9153 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9154 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9155
9156 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9157 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9158 {
9159 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9160
9161 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9162 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9163 if (ud1 != 0)
9164 emit_move_insn (dest,
9165 gen_rtx_IOR (DImode, copy_rtx (temp),
9166 GEN_INT (ud1)));
9167 }
9168 else if (ud3 == 0 && ud4 == 0)
9169 {
9170 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9171
9172 gcc_assert (ud2 & 0x8000);
9173 emit_move_insn (copy_rtx (temp),
9174 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9175 if (ud1 != 0)
9176 emit_move_insn (copy_rtx (temp),
9177 gen_rtx_IOR (DImode, copy_rtx (temp),
9178 GEN_INT (ud1)));
9179 emit_move_insn (dest,
9180 gen_rtx_ZERO_EXTEND (DImode,
9181 gen_lowpart (SImode,
9182 copy_rtx (temp))));
9183 }
9184 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9185 || (ud4 == 0 && ! (ud3 & 0x8000)))
9186 {
9187 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9188
9189 emit_move_insn (copy_rtx (temp),
9190 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9191 if (ud2 != 0)
9192 emit_move_insn (copy_rtx (temp),
9193 gen_rtx_IOR (DImode, copy_rtx (temp),
9194 GEN_INT (ud2)));
9195 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9196 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9197 GEN_INT (16)));
9198 if (ud1 != 0)
9199 emit_move_insn (dest,
9200 gen_rtx_IOR (DImode, copy_rtx (temp),
9201 GEN_INT (ud1)));
9202 }
9203 else
9204 {
9205 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9206
9207 emit_move_insn (copy_rtx (temp),
9208 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9209 if (ud3 != 0)
9210 emit_move_insn (copy_rtx (temp),
9211 gen_rtx_IOR (DImode, copy_rtx (temp),
9212 GEN_INT (ud3)));
9213
9214 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9215 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9216 GEN_INT (32)));
9217 if (ud2 != 0)
9218 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9219 gen_rtx_IOR (DImode, copy_rtx (temp),
9220 GEN_INT (ud2 << 16)));
9221 if (ud1 != 0)
9222 emit_move_insn (dest,
9223 gen_rtx_IOR (DImode, copy_rtx (temp),
9224 GEN_INT (ud1)));
9225 }
9226 }
9227
9228 /* Helper for the following. Get rid of [r+r] memory refs
9229 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9230
9231 static void
9232 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9233 {
9234 if (MEM_P (operands[0])
9235 && !REG_P (XEXP (operands[0], 0))
9236 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9237 GET_MODE (operands[0]), false))
9238 operands[0]
9239 = replace_equiv_address (operands[0],
9240 copy_addr_to_reg (XEXP (operands[0], 0)));
9241
9242 if (MEM_P (operands[1])
9243 && !REG_P (XEXP (operands[1], 0))
9244 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9245 GET_MODE (operands[1]), false))
9246 operands[1]
9247 = replace_equiv_address (operands[1],
9248 copy_addr_to_reg (XEXP (operands[1], 0)));
9249 }
9250
9251 /* Generate a vector of constants to permute MODE for a little-endian
9252 storage operation by swapping the two halves of a vector. */
9253 static rtvec
9254 rs6000_const_vec (machine_mode mode)
9255 {
9256 int i, subparts;
9257 rtvec v;
9258
9259 switch (mode)
9260 {
9261 case E_V1TImode:
9262 subparts = 1;
9263 break;
9264 case E_V2DFmode:
9265 case E_V2DImode:
9266 subparts = 2;
9267 break;
9268 case E_V4SFmode:
9269 case E_V4SImode:
9270 subparts = 4;
9271 break;
9272 case E_V8HImode:
9273 subparts = 8;
9274 break;
9275 case E_V16QImode:
9276 subparts = 16;
9277 break;
9278 default:
9279 gcc_unreachable();
9280 }
9281
9282 v = rtvec_alloc (subparts);
9283
9284 for (i = 0; i < subparts / 2; ++i)
9285 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9286 for (i = subparts / 2; i < subparts; ++i)
9287 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9288
9289 return v;
9290 }
9291
9292 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9293 store operation. */
9294 void
9295 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9296 {
9297 /* Scalar permutations are easier to express in integer modes rather than
9298 floating-point modes, so cast them here. We use V1TImode instead
9299 of TImode to ensure that the values don't go through GPRs. */
9300 if (FLOAT128_VECTOR_P (mode))
9301 {
9302 dest = gen_lowpart (V1TImode, dest);
9303 source = gen_lowpart (V1TImode, source);
9304 mode = V1TImode;
9305 }
9306
9307 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9308 scalar. */
9309 if (mode == TImode || mode == V1TImode)
9310 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9311 GEN_INT (64))));
9312 else
9313 {
9314 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9315 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9316 }
9317 }
9318
9319 /* Emit a little-endian load from vector memory location SOURCE to VSX
9320 register DEST in mode MODE. The load is done with two permuting
9321 insn's that represent an lxvd2x and xxpermdi. */
9322 void
9323 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9324 {
9325 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9326 V1TImode). */
9327 if (mode == TImode || mode == V1TImode)
9328 {
9329 mode = V2DImode;
9330 dest = gen_lowpart (V2DImode, dest);
9331 source = adjust_address (source, V2DImode, 0);
9332 }
9333
9334 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9335 rs6000_emit_le_vsx_permute (tmp, source, mode);
9336 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9337 }
9338
9339 /* Emit a little-endian store to vector memory location DEST from VSX
9340 register SOURCE in mode MODE. The store is done with two permuting
9341 insn's that represent an xxpermdi and an stxvd2x. */
9342 void
9343 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9344 {
9345 /* This should never be called during or after LRA, because it does
9346 not re-permute the source register. It is intended only for use
9347 during expand. */
9348 gcc_assert (!lra_in_progress && !reload_completed);
9349
9350 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9351 V1TImode). */
9352 if (mode == TImode || mode == V1TImode)
9353 {
9354 mode = V2DImode;
9355 dest = adjust_address (dest, V2DImode, 0);
9356 source = gen_lowpart (V2DImode, source);
9357 }
9358
9359 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9360 rs6000_emit_le_vsx_permute (tmp, source, mode);
9361 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9362 }
9363
9364 /* Emit a sequence representing a little-endian VSX load or store,
9365 moving data from SOURCE to DEST in mode MODE. This is done
9366 separately from rs6000_emit_move to ensure it is called only
9367 during expand. LE VSX loads and stores introduced later are
9368 handled with a split. The expand-time RTL generation allows
9369 us to optimize away redundant pairs of register-permutes. */
9370 void
9371 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9372 {
9373 gcc_assert (!BYTES_BIG_ENDIAN
9374 && VECTOR_MEM_VSX_P (mode)
9375 && !TARGET_P9_VECTOR
9376 && !gpr_or_gpr_p (dest, source)
9377 && (MEM_P (source) ^ MEM_P (dest)));
9378
9379 if (MEM_P (source))
9380 {
9381 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9382 rs6000_emit_le_vsx_load (dest, source, mode);
9383 }
9384 else
9385 {
9386 if (!REG_P (source))
9387 source = force_reg (mode, source);
9388 rs6000_emit_le_vsx_store (dest, source, mode);
9389 }
9390 }
9391
9392 /* Return whether a SFmode or SImode move can be done without converting one
9393 mode to another. This arrises when we have:
9394
9395 (SUBREG:SF (REG:SI ...))
9396 (SUBREG:SI (REG:SF ...))
9397
9398 and one of the values is in a floating point/vector register, where SFmode
9399 scalars are stored in DFmode format. */
9400
9401 bool
9402 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9403 {
9404 if (TARGET_ALLOW_SF_SUBREG)
9405 return true;
9406
9407 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9408 return true;
9409
9410 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9411 return true;
9412
9413 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9414 if (SUBREG_P (dest))
9415 {
9416 rtx dest_subreg = SUBREG_REG (dest);
9417 rtx src_subreg = SUBREG_REG (src);
9418 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9419 }
9420
9421 return false;
9422 }
9423
9424
9425 /* Helper function to change moves with:
9426
9427 (SUBREG:SF (REG:SI)) and
9428 (SUBREG:SI (REG:SF))
9429
9430 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9431 values are stored as DFmode values in the VSX registers. We need to convert
9432 the bits before we can use a direct move or operate on the bits in the
9433 vector register as an integer type.
9434
9435 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9436
9437 static bool
9438 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9439 {
9440 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9441 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9442 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9443 {
9444 rtx inner_source = SUBREG_REG (source);
9445 machine_mode inner_mode = GET_MODE (inner_source);
9446
9447 if (mode == SImode && inner_mode == SFmode)
9448 {
9449 emit_insn (gen_movsi_from_sf (dest, inner_source));
9450 return true;
9451 }
9452
9453 if (mode == SFmode && inner_mode == SImode)
9454 {
9455 emit_insn (gen_movsf_from_si (dest, inner_source));
9456 return true;
9457 }
9458 }
9459
9460 return false;
9461 }
9462
9463 /* Emit a move from SOURCE to DEST in mode MODE. */
9464 void
9465 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9466 {
9467 rtx operands[2];
9468 operands[0] = dest;
9469 operands[1] = source;
9470
9471 if (TARGET_DEBUG_ADDR)
9472 {
9473 fprintf (stderr,
9474 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9475 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9476 GET_MODE_NAME (mode),
9477 lra_in_progress,
9478 reload_completed,
9479 can_create_pseudo_p ());
9480 debug_rtx (dest);
9481 fprintf (stderr, "source:\n");
9482 debug_rtx (source);
9483 }
9484
9485 /* Check that we get CONST_WIDE_INT only when we should. */
9486 if (CONST_WIDE_INT_P (operands[1])
9487 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9488 gcc_unreachable ();
9489
9490 #ifdef HAVE_AS_GNU_ATTRIBUTE
9491 /* If we use a long double type, set the flags in .gnu_attribute that say
9492 what the long double type is. This is to allow the linker's warning
9493 message for the wrong long double to be useful, even if the function does
9494 not do a call (for example, doing a 128-bit add on power9 if the long
9495 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9496 used if they aren't the default long dobule type. */
9497 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9498 {
9499 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9500 rs6000_passes_float = rs6000_passes_long_double = true;
9501
9502 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9503 rs6000_passes_float = rs6000_passes_long_double = true;
9504 }
9505 #endif
9506
9507 /* See if we need to special case SImode/SFmode SUBREG moves. */
9508 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9509 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9510 return;
9511
9512 /* Check if GCC is setting up a block move that will end up using FP
9513 registers as temporaries. We must make sure this is acceptable. */
9514 if (MEM_P (operands[0])
9515 && MEM_P (operands[1])
9516 && mode == DImode
9517 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9518 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9519 && ! (rs6000_slow_unaligned_access (SImode,
9520 (MEM_ALIGN (operands[0]) > 32
9521 ? 32 : MEM_ALIGN (operands[0])))
9522 || rs6000_slow_unaligned_access (SImode,
9523 (MEM_ALIGN (operands[1]) > 32
9524 ? 32 : MEM_ALIGN (operands[1]))))
9525 && ! MEM_VOLATILE_P (operands [0])
9526 && ! MEM_VOLATILE_P (operands [1]))
9527 {
9528 emit_move_insn (adjust_address (operands[0], SImode, 0),
9529 adjust_address (operands[1], SImode, 0));
9530 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9531 adjust_address (copy_rtx (operands[1]), SImode, 4));
9532 return;
9533 }
9534
9535 if (can_create_pseudo_p () && MEM_P (operands[0])
9536 && !gpc_reg_operand (operands[1], mode))
9537 operands[1] = force_reg (mode, operands[1]);
9538
9539 /* Recognize the case where operand[1] is a reference to thread-local
9540 data and load its address to a register. */
9541 if (tls_referenced_p (operands[1]))
9542 {
9543 enum tls_model model;
9544 rtx tmp = operands[1];
9545 rtx addend = NULL;
9546
9547 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9548 {
9549 addend = XEXP (XEXP (tmp, 0), 1);
9550 tmp = XEXP (XEXP (tmp, 0), 0);
9551 }
9552
9553 gcc_assert (SYMBOL_REF_P (tmp));
9554 model = SYMBOL_REF_TLS_MODEL (tmp);
9555 gcc_assert (model != 0);
9556
9557 tmp = rs6000_legitimize_tls_address (tmp, model);
9558 if (addend)
9559 {
9560 tmp = gen_rtx_PLUS (mode, tmp, addend);
9561 tmp = force_operand (tmp, operands[0]);
9562 }
9563 operands[1] = tmp;
9564 }
9565
9566 /* 128-bit constant floating-point values on Darwin should really be loaded
9567 as two parts. However, this premature splitting is a problem when DFmode
9568 values can go into Altivec registers. */
9569 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9570 && !reg_addr[DFmode].scalar_in_vmx_p)
9571 {
9572 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9573 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9574 DFmode);
9575 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9576 GET_MODE_SIZE (DFmode)),
9577 simplify_gen_subreg (DFmode, operands[1], mode,
9578 GET_MODE_SIZE (DFmode)),
9579 DFmode);
9580 return;
9581 }
9582
9583 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9584 p1:SD) if p1 is not of floating point class and p0 is spilled as
9585 we can have no analogous movsd_store for this. */
9586 if (lra_in_progress && mode == DDmode
9587 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9588 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9589 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9590 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9591 {
9592 enum reg_class cl;
9593 int regno = REGNO (SUBREG_REG (operands[1]));
9594
9595 if (!HARD_REGISTER_NUM_P (regno))
9596 {
9597 cl = reg_preferred_class (regno);
9598 regno = reg_renumber[regno];
9599 if (regno < 0)
9600 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9601 }
9602 if (regno >= 0 && ! FP_REGNO_P (regno))
9603 {
9604 mode = SDmode;
9605 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9606 operands[1] = SUBREG_REG (operands[1]);
9607 }
9608 }
9609 if (lra_in_progress
9610 && mode == SDmode
9611 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9612 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9613 && (REG_P (operands[1])
9614 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9615 {
9616 int regno = reg_or_subregno (operands[1]);
9617 enum reg_class cl;
9618
9619 if (!HARD_REGISTER_NUM_P (regno))
9620 {
9621 cl = reg_preferred_class (regno);
9622 gcc_assert (cl != NO_REGS);
9623 regno = reg_renumber[regno];
9624 if (regno < 0)
9625 regno = ira_class_hard_regs[cl][0];
9626 }
9627 if (FP_REGNO_P (regno))
9628 {
9629 if (GET_MODE (operands[0]) != DDmode)
9630 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9631 emit_insn (gen_movsd_store (operands[0], operands[1]));
9632 }
9633 else if (INT_REGNO_P (regno))
9634 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9635 else
9636 gcc_unreachable();
9637 return;
9638 }
9639 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9640 p:DD)) if p0 is not of floating point class and p1 is spilled as
9641 we can have no analogous movsd_load for this. */
9642 if (lra_in_progress && mode == DDmode
9643 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9644 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9645 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9646 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9647 {
9648 enum reg_class cl;
9649 int regno = REGNO (SUBREG_REG (operands[0]));
9650
9651 if (!HARD_REGISTER_NUM_P (regno))
9652 {
9653 cl = reg_preferred_class (regno);
9654 regno = reg_renumber[regno];
9655 if (regno < 0)
9656 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9657 }
9658 if (regno >= 0 && ! FP_REGNO_P (regno))
9659 {
9660 mode = SDmode;
9661 operands[0] = SUBREG_REG (operands[0]);
9662 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9663 }
9664 }
9665 if (lra_in_progress
9666 && mode == SDmode
9667 && (REG_P (operands[0])
9668 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9669 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9670 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9671 {
9672 int regno = reg_or_subregno (operands[0]);
9673 enum reg_class cl;
9674
9675 if (!HARD_REGISTER_NUM_P (regno))
9676 {
9677 cl = reg_preferred_class (regno);
9678 gcc_assert (cl != NO_REGS);
9679 regno = reg_renumber[regno];
9680 if (regno < 0)
9681 regno = ira_class_hard_regs[cl][0];
9682 }
9683 if (FP_REGNO_P (regno))
9684 {
9685 if (GET_MODE (operands[1]) != DDmode)
9686 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9687 emit_insn (gen_movsd_load (operands[0], operands[1]));
9688 }
9689 else if (INT_REGNO_P (regno))
9690 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9691 else
9692 gcc_unreachable();
9693 return;
9694 }
9695
9696 /* FIXME: In the long term, this switch statement should go away
9697 and be replaced by a sequence of tests based on things like
9698 mode == Pmode. */
9699 switch (mode)
9700 {
9701 case E_HImode:
9702 case E_QImode:
9703 if (CONSTANT_P (operands[1])
9704 && !CONST_INT_P (operands[1]))
9705 operands[1] = force_const_mem (mode, operands[1]);
9706 break;
9707
9708 case E_TFmode:
9709 case E_TDmode:
9710 case E_IFmode:
9711 case E_KFmode:
9712 if (FLOAT128_2REG_P (mode))
9713 rs6000_eliminate_indexed_memrefs (operands);
9714 /* fall through */
9715
9716 case E_DFmode:
9717 case E_DDmode:
9718 case E_SFmode:
9719 case E_SDmode:
9720 if (CONSTANT_P (operands[1])
9721 && ! easy_fp_constant (operands[1], mode))
9722 operands[1] = force_const_mem (mode, operands[1]);
9723 break;
9724
9725 case E_V16QImode:
9726 case E_V8HImode:
9727 case E_V4SFmode:
9728 case E_V4SImode:
9729 case E_V2DFmode:
9730 case E_V2DImode:
9731 case E_V1TImode:
9732 if (CONSTANT_P (operands[1])
9733 && !easy_vector_constant (operands[1], mode))
9734 operands[1] = force_const_mem (mode, operands[1]);
9735 break;
9736
9737 case E_SImode:
9738 case E_DImode:
9739 /* Use default pattern for address of ELF small data */
9740 if (TARGET_ELF
9741 && mode == Pmode
9742 && DEFAULT_ABI == ABI_V4
9743 && (SYMBOL_REF_P (operands[1])
9744 || GET_CODE (operands[1]) == CONST)
9745 && small_data_operand (operands[1], mode))
9746 {
9747 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9748 return;
9749 }
9750
9751 if (DEFAULT_ABI == ABI_V4
9752 && mode == Pmode && mode == SImode
9753 && flag_pic == 1 && got_operand (operands[1], mode))
9754 {
9755 emit_insn (gen_movsi_got (operands[0], operands[1]));
9756 return;
9757 }
9758
9759 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9760 && TARGET_NO_TOC
9761 && ! flag_pic
9762 && mode == Pmode
9763 && CONSTANT_P (operands[1])
9764 && GET_CODE (operands[1]) != HIGH
9765 && !CONST_INT_P (operands[1]))
9766 {
9767 rtx target = (!can_create_pseudo_p ()
9768 ? operands[0]
9769 : gen_reg_rtx (mode));
9770
9771 /* If this is a function address on -mcall-aixdesc,
9772 convert it to the address of the descriptor. */
9773 if (DEFAULT_ABI == ABI_AIX
9774 && SYMBOL_REF_P (operands[1])
9775 && XSTR (operands[1], 0)[0] == '.')
9776 {
9777 const char *name = XSTR (operands[1], 0);
9778 rtx new_ref;
9779 while (*name == '.')
9780 name++;
9781 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9782 CONSTANT_POOL_ADDRESS_P (new_ref)
9783 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9784 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9785 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9786 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9787 operands[1] = new_ref;
9788 }
9789
9790 if (DEFAULT_ABI == ABI_DARWIN)
9791 {
9792 #if TARGET_MACHO
9793 if (MACHO_DYNAMIC_NO_PIC_P)
9794 {
9795 /* Take care of any required data indirection. */
9796 operands[1] = rs6000_machopic_legitimize_pic_address (
9797 operands[1], mode, operands[0]);
9798 if (operands[0] != operands[1])
9799 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9800 return;
9801 }
9802 #endif
9803 emit_insn (gen_macho_high (target, operands[1]));
9804 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9805 return;
9806 }
9807
9808 emit_insn (gen_elf_high (target, operands[1]));
9809 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9810 return;
9811 }
9812
9813 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9814 and we have put it in the TOC, we just need to make a TOC-relative
9815 reference to it. */
9816 if (TARGET_TOC
9817 && SYMBOL_REF_P (operands[1])
9818 && use_toc_relative_ref (operands[1], mode))
9819 operands[1] = create_TOC_reference (operands[1], operands[0]);
9820 else if (mode == Pmode
9821 && CONSTANT_P (operands[1])
9822 && GET_CODE (operands[1]) != HIGH
9823 && ((REG_P (operands[0])
9824 && FP_REGNO_P (REGNO (operands[0])))
9825 || !CONST_INT_P (operands[1])
9826 || (num_insns_constant (operands[1], mode)
9827 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9828 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9829 && (TARGET_CMODEL == CMODEL_SMALL
9830 || can_create_pseudo_p ()
9831 || (REG_P (operands[0])
9832 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9833 {
9834
9835 #if TARGET_MACHO
9836 /* Darwin uses a special PIC legitimizer. */
9837 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9838 {
9839 operands[1] =
9840 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9841 operands[0]);
9842 if (operands[0] != operands[1])
9843 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9844 return;
9845 }
9846 #endif
9847
9848 /* If we are to limit the number of things we put in the TOC and
9849 this is a symbol plus a constant we can add in one insn,
9850 just put the symbol in the TOC and add the constant. */
9851 if (GET_CODE (operands[1]) == CONST
9852 && TARGET_NO_SUM_IN_TOC
9853 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9854 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9855 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9856 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9857 && ! side_effects_p (operands[0]))
9858 {
9859 rtx sym =
9860 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9861 rtx other = XEXP (XEXP (operands[1], 0), 1);
9862
9863 sym = force_reg (mode, sym);
9864 emit_insn (gen_add3_insn (operands[0], sym, other));
9865 return;
9866 }
9867
9868 operands[1] = force_const_mem (mode, operands[1]);
9869
9870 if (TARGET_TOC
9871 && SYMBOL_REF_P (XEXP (operands[1], 0))
9872 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9873 {
9874 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9875 operands[0]);
9876 operands[1] = gen_const_mem (mode, tocref);
9877 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9878 }
9879 }
9880 break;
9881
9882 case E_TImode:
9883 if (!VECTOR_MEM_VSX_P (TImode))
9884 rs6000_eliminate_indexed_memrefs (operands);
9885 break;
9886
9887 case E_PTImode:
9888 rs6000_eliminate_indexed_memrefs (operands);
9889 break;
9890
9891 default:
9892 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9893 }
9894
9895 /* Above, we may have called force_const_mem which may have returned
9896 an invalid address. If we can, fix this up; otherwise, reload will
9897 have to deal with it. */
9898 if (MEM_P (operands[1]))
9899 operands[1] = validize_mem (operands[1]);
9900
9901 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9902 }
9903 \f
9904 /* Nonzero if we can use a floating-point register to pass this arg. */
9905 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9906 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9907 && (CUM)->fregno <= FP_ARG_MAX_REG \
9908 && TARGET_HARD_FLOAT)
9909
9910 /* Nonzero if we can use an AltiVec register to pass this arg. */
9911 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9912 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9913 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9914 && TARGET_ALTIVEC_ABI \
9915 && (NAMED))
9916
9917 /* Walk down the type tree of TYPE counting consecutive base elements.
9918 If *MODEP is VOIDmode, then set it to the first valid floating point
9919 or vector type. If a non-floating point or vector type is found, or
9920 if a floating point or vector type that doesn't match a non-VOIDmode
9921 *MODEP is found, then return -1, otherwise return the count in the
9922 sub-tree. */
9923
9924 static int
9925 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9926 {
9927 machine_mode mode;
9928 HOST_WIDE_INT size;
9929
9930 switch (TREE_CODE (type))
9931 {
9932 case REAL_TYPE:
9933 mode = TYPE_MODE (type);
9934 if (!SCALAR_FLOAT_MODE_P (mode))
9935 return -1;
9936
9937 if (*modep == VOIDmode)
9938 *modep = mode;
9939
9940 if (*modep == mode)
9941 return 1;
9942
9943 break;
9944
9945 case COMPLEX_TYPE:
9946 mode = TYPE_MODE (TREE_TYPE (type));
9947 if (!SCALAR_FLOAT_MODE_P (mode))
9948 return -1;
9949
9950 if (*modep == VOIDmode)
9951 *modep = mode;
9952
9953 if (*modep == mode)
9954 return 2;
9955
9956 break;
9957
9958 case VECTOR_TYPE:
9959 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9960 return -1;
9961
9962 /* Use V4SImode as representative of all 128-bit vector types. */
9963 size = int_size_in_bytes (type);
9964 switch (size)
9965 {
9966 case 16:
9967 mode = V4SImode;
9968 break;
9969 default:
9970 return -1;
9971 }
9972
9973 if (*modep == VOIDmode)
9974 *modep = mode;
9975
9976 /* Vector modes are considered to be opaque: two vectors are
9977 equivalent for the purposes of being homogeneous aggregates
9978 if they are the same size. */
9979 if (*modep == mode)
9980 return 1;
9981
9982 break;
9983
9984 case ARRAY_TYPE:
9985 {
9986 int count;
9987 tree index = TYPE_DOMAIN (type);
9988
9989 /* Can't handle incomplete types nor sizes that are not
9990 fixed. */
9991 if (!COMPLETE_TYPE_P (type)
9992 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9993 return -1;
9994
9995 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9996 if (count == -1
9997 || !index
9998 || !TYPE_MAX_VALUE (index)
9999 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10000 || !TYPE_MIN_VALUE (index)
10001 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10002 || count < 0)
10003 return -1;
10004
10005 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10006 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10007
10008 /* There must be no padding. */
10009 if (wi::to_wide (TYPE_SIZE (type))
10010 != count * GET_MODE_BITSIZE (*modep))
10011 return -1;
10012
10013 return count;
10014 }
10015
10016 case RECORD_TYPE:
10017 {
10018 int count = 0;
10019 int sub_count;
10020 tree field;
10021
10022 /* Can't handle incomplete types nor sizes that are not
10023 fixed. */
10024 if (!COMPLETE_TYPE_P (type)
10025 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10026 return -1;
10027
10028 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10029 {
10030 if (TREE_CODE (field) != FIELD_DECL)
10031 continue;
10032
10033 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10034 if (sub_count < 0)
10035 return -1;
10036 count += sub_count;
10037 }
10038
10039 /* There must be no padding. */
10040 if (wi::to_wide (TYPE_SIZE (type))
10041 != count * GET_MODE_BITSIZE (*modep))
10042 return -1;
10043
10044 return count;
10045 }
10046
10047 case UNION_TYPE:
10048 case QUAL_UNION_TYPE:
10049 {
10050 /* These aren't very interesting except in a degenerate case. */
10051 int count = 0;
10052 int sub_count;
10053 tree field;
10054
10055 /* Can't handle incomplete types nor sizes that are not
10056 fixed. */
10057 if (!COMPLETE_TYPE_P (type)
10058 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10059 return -1;
10060
10061 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10062 {
10063 if (TREE_CODE (field) != FIELD_DECL)
10064 continue;
10065
10066 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10067 if (sub_count < 0)
10068 return -1;
10069 count = count > sub_count ? count : sub_count;
10070 }
10071
10072 /* There must be no padding. */
10073 if (wi::to_wide (TYPE_SIZE (type))
10074 != count * GET_MODE_BITSIZE (*modep))
10075 return -1;
10076
10077 return count;
10078 }
10079
10080 default:
10081 break;
10082 }
10083
10084 return -1;
10085 }
10086
10087 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10088 float or vector aggregate that shall be passed in FP/vector registers
10089 according to the ELFv2 ABI, return the homogeneous element mode in
10090 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10091
10092 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10093
10094 static bool
10095 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10096 machine_mode *elt_mode,
10097 int *n_elts)
10098 {
10099 /* Note that we do not accept complex types at the top level as
10100 homogeneous aggregates; these types are handled via the
10101 targetm.calls.split_complex_arg mechanism. Complex types
10102 can be elements of homogeneous aggregates, however. */
10103 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10104 && AGGREGATE_TYPE_P (type))
10105 {
10106 machine_mode field_mode = VOIDmode;
10107 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10108
10109 if (field_count > 0)
10110 {
10111 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10112 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10113
10114 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10115 up to AGGR_ARG_NUM_REG registers. */
10116 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10117 {
10118 if (elt_mode)
10119 *elt_mode = field_mode;
10120 if (n_elts)
10121 *n_elts = field_count;
10122 return true;
10123 }
10124 }
10125 }
10126
10127 if (elt_mode)
10128 *elt_mode = mode;
10129 if (n_elts)
10130 *n_elts = 1;
10131 return false;
10132 }
10133
10134 /* Return a nonzero value to say to return the function value in
10135 memory, just as large structures are always returned. TYPE will be
10136 the data type of the value, and FNTYPE will be the type of the
10137 function doing the returning, or @code{NULL} for libcalls.
10138
10139 The AIX ABI for the RS/6000 specifies that all structures are
10140 returned in memory. The Darwin ABI does the same.
10141
10142 For the Darwin 64 Bit ABI, a function result can be returned in
10143 registers or in memory, depending on the size of the return data
10144 type. If it is returned in registers, the value occupies the same
10145 registers as it would if it were the first and only function
10146 argument. Otherwise, the function places its result in memory at
10147 the location pointed to by GPR3.
10148
10149 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10150 but a draft put them in memory, and GCC used to implement the draft
10151 instead of the final standard. Therefore, aix_struct_return
10152 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10153 compatibility can change DRAFT_V4_STRUCT_RET to override the
10154 default, and -m switches get the final word. See
10155 rs6000_option_override_internal for more details.
10156
10157 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10158 long double support is enabled. These values are returned in memory.
10159
10160 int_size_in_bytes returns -1 for variable size objects, which go in
10161 memory always. The cast to unsigned makes -1 > 8. */
10162
10163 static bool
10164 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10165 {
10166 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10167 if (TARGET_MACHO
10168 && rs6000_darwin64_abi
10169 && TREE_CODE (type) == RECORD_TYPE
10170 && int_size_in_bytes (type) > 0)
10171 {
10172 CUMULATIVE_ARGS valcum;
10173 rtx valret;
10174
10175 valcum.words = 0;
10176 valcum.fregno = FP_ARG_MIN_REG;
10177 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10178 /* Do a trial code generation as if this were going to be passed
10179 as an argument; if any part goes in memory, we return NULL. */
10180 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10181 if (valret)
10182 return false;
10183 /* Otherwise fall through to more conventional ABI rules. */
10184 }
10185
10186 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10187 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10188 NULL, NULL))
10189 return false;
10190
10191 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10192 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10193 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10194 return false;
10195
10196 if (AGGREGATE_TYPE_P (type)
10197 && (aix_struct_return
10198 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10199 return true;
10200
10201 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10202 modes only exist for GCC vector types if -maltivec. */
10203 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10204 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10205 return false;
10206
10207 /* Return synthetic vectors in memory. */
10208 if (TREE_CODE (type) == VECTOR_TYPE
10209 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10210 {
10211 static bool warned_for_return_big_vectors = false;
10212 if (!warned_for_return_big_vectors)
10213 {
10214 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10215 "non-standard ABI extension with no compatibility "
10216 "guarantee");
10217 warned_for_return_big_vectors = true;
10218 }
10219 return true;
10220 }
10221
10222 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10223 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10224 return true;
10225
10226 return false;
10227 }
10228
10229 /* Specify whether values returned in registers should be at the most
10230 significant end of a register. We want aggregates returned by
10231 value to match the way aggregates are passed to functions. */
10232
10233 static bool
10234 rs6000_return_in_msb (const_tree valtype)
10235 {
10236 return (DEFAULT_ABI == ABI_ELFv2
10237 && BYTES_BIG_ENDIAN
10238 && AGGREGATE_TYPE_P (valtype)
10239 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10240 == PAD_UPWARD));
10241 }
10242
10243 #ifdef HAVE_AS_GNU_ATTRIBUTE
10244 /* Return TRUE if a call to function FNDECL may be one that
10245 potentially affects the function calling ABI of the object file. */
10246
10247 static bool
10248 call_ABI_of_interest (tree fndecl)
10249 {
10250 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10251 {
10252 struct cgraph_node *c_node;
10253
10254 /* Libcalls are always interesting. */
10255 if (fndecl == NULL_TREE)
10256 return true;
10257
10258 /* Any call to an external function is interesting. */
10259 if (DECL_EXTERNAL (fndecl))
10260 return true;
10261
10262 /* Interesting functions that we are emitting in this object file. */
10263 c_node = cgraph_node::get (fndecl);
10264 c_node = c_node->ultimate_alias_target ();
10265 return !c_node->only_called_directly_p ();
10266 }
10267 return false;
10268 }
10269 #endif
10270
10271 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10272 for a call to a function whose data type is FNTYPE.
10273 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10274
10275 For incoming args we set the number of arguments in the prototype large
10276 so we never return a PARALLEL. */
10277
10278 void
10279 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10280 rtx libname ATTRIBUTE_UNUSED, int incoming,
10281 int libcall, int n_named_args,
10282 tree fndecl,
10283 machine_mode return_mode ATTRIBUTE_UNUSED)
10284 {
10285 static CUMULATIVE_ARGS zero_cumulative;
10286
10287 *cum = zero_cumulative;
10288 cum->words = 0;
10289 cum->fregno = FP_ARG_MIN_REG;
10290 cum->vregno = ALTIVEC_ARG_MIN_REG;
10291 cum->prototype = (fntype && prototype_p (fntype));
10292 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10293 ? CALL_LIBCALL : CALL_NORMAL);
10294 cum->sysv_gregno = GP_ARG_MIN_REG;
10295 cum->stdarg = stdarg_p (fntype);
10296 cum->libcall = libcall;
10297
10298 cum->nargs_prototype = 0;
10299 if (incoming || cum->prototype)
10300 cum->nargs_prototype = n_named_args;
10301
10302 /* Check for a longcall attribute. */
10303 if ((!fntype && rs6000_default_long_calls)
10304 || (fntype
10305 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10306 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10307 cum->call_cookie |= CALL_LONG;
10308 else if (DEFAULT_ABI != ABI_DARWIN)
10309 {
10310 bool is_local = (fndecl
10311 && !DECL_EXTERNAL (fndecl)
10312 && !DECL_WEAK (fndecl)
10313 && (*targetm.binds_local_p) (fndecl));
10314 if (is_local)
10315 ;
10316 else if (flag_plt)
10317 {
10318 if (fntype
10319 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
10320 cum->call_cookie |= CALL_LONG;
10321 }
10322 else
10323 {
10324 if (!(fntype
10325 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
10326 cum->call_cookie |= CALL_LONG;
10327 }
10328 }
10329
10330 if (TARGET_DEBUG_ARG)
10331 {
10332 fprintf (stderr, "\ninit_cumulative_args:");
10333 if (fntype)
10334 {
10335 tree ret_type = TREE_TYPE (fntype);
10336 fprintf (stderr, " ret code = %s,",
10337 get_tree_code_name (TREE_CODE (ret_type)));
10338 }
10339
10340 if (cum->call_cookie & CALL_LONG)
10341 fprintf (stderr, " longcall,");
10342
10343 fprintf (stderr, " proto = %d, nargs = %d\n",
10344 cum->prototype, cum->nargs_prototype);
10345 }
10346
10347 #ifdef HAVE_AS_GNU_ATTRIBUTE
10348 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10349 {
10350 cum->escapes = call_ABI_of_interest (fndecl);
10351 if (cum->escapes)
10352 {
10353 tree return_type;
10354
10355 if (fntype)
10356 {
10357 return_type = TREE_TYPE (fntype);
10358 return_mode = TYPE_MODE (return_type);
10359 }
10360 else
10361 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10362
10363 if (return_type != NULL)
10364 {
10365 if (TREE_CODE (return_type) == RECORD_TYPE
10366 && TYPE_TRANSPARENT_AGGR (return_type))
10367 {
10368 return_type = TREE_TYPE (first_field (return_type));
10369 return_mode = TYPE_MODE (return_type);
10370 }
10371 if (AGGREGATE_TYPE_P (return_type)
10372 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10373 <= 8))
10374 rs6000_returns_struct = true;
10375 }
10376 if (SCALAR_FLOAT_MODE_P (return_mode))
10377 {
10378 rs6000_passes_float = true;
10379 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10380 && (FLOAT128_IBM_P (return_mode)
10381 || FLOAT128_IEEE_P (return_mode)
10382 || (return_type != NULL
10383 && (TYPE_MAIN_VARIANT (return_type)
10384 == long_double_type_node))))
10385 rs6000_passes_long_double = true;
10386
10387 /* Note if we passed or return a IEEE 128-bit type. We changed
10388 the mangling for these types, and we may need to make an alias
10389 with the old mangling. */
10390 if (FLOAT128_IEEE_P (return_mode))
10391 rs6000_passes_ieee128 = true;
10392 }
10393 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10394 rs6000_passes_vector = true;
10395 }
10396 }
10397 #endif
10398
10399 if (fntype
10400 && !TARGET_ALTIVEC
10401 && TARGET_ALTIVEC_ABI
10402 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10403 {
10404 error ("cannot return value in vector register because"
10405 " altivec instructions are disabled, use %qs"
10406 " to enable them", "-maltivec");
10407 }
10408 }
10409 \f
10410 /* The mode the ABI uses for a word. This is not the same as word_mode
10411 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10412
10413 static scalar_int_mode
10414 rs6000_abi_word_mode (void)
10415 {
10416 return TARGET_32BIT ? SImode : DImode;
10417 }
10418
10419 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10420 static char *
10421 rs6000_offload_options (void)
10422 {
10423 if (TARGET_64BIT)
10424 return xstrdup ("-foffload-abi=lp64");
10425 else
10426 return xstrdup ("-foffload-abi=ilp32");
10427 }
10428
10429 /* On rs6000, function arguments are promoted, as are function return
10430 values. */
10431
10432 static machine_mode
10433 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10434 machine_mode mode,
10435 int *punsignedp ATTRIBUTE_UNUSED,
10436 const_tree, int)
10437 {
10438 PROMOTE_MODE (mode, *punsignedp, type);
10439
10440 return mode;
10441 }
10442
10443 /* Return true if TYPE must be passed on the stack and not in registers. */
10444
10445 static bool
10446 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10447 {
10448 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10449 return must_pass_in_stack_var_size (mode, type);
10450 else
10451 return must_pass_in_stack_var_size_or_pad (mode, type);
10452 }
10453
10454 static inline bool
10455 is_complex_IBM_long_double (machine_mode mode)
10456 {
10457 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10458 }
10459
10460 /* Whether ABI_V4 passes MODE args to a function in floating point
10461 registers. */
10462
10463 static bool
10464 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10465 {
10466 if (!TARGET_HARD_FLOAT)
10467 return false;
10468 if (mode == DFmode)
10469 return true;
10470 if (mode == SFmode && named)
10471 return true;
10472 /* ABI_V4 passes complex IBM long double in 8 gprs.
10473 Stupid, but we can't change the ABI now. */
10474 if (is_complex_IBM_long_double (mode))
10475 return false;
10476 if (FLOAT128_2REG_P (mode))
10477 return true;
10478 if (DECIMAL_FLOAT_MODE_P (mode))
10479 return true;
10480 return false;
10481 }
10482
10483 /* Implement TARGET_FUNCTION_ARG_PADDING.
10484
10485 For the AIX ABI structs are always stored left shifted in their
10486 argument slot. */
10487
10488 static pad_direction
10489 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10490 {
10491 #ifndef AGGREGATE_PADDING_FIXED
10492 #define AGGREGATE_PADDING_FIXED 0
10493 #endif
10494 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10495 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10496 #endif
10497
10498 if (!AGGREGATE_PADDING_FIXED)
10499 {
10500 /* GCC used to pass structures of the same size as integer types as
10501 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10502 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10503 passed padded downward, except that -mstrict-align further
10504 muddied the water in that multi-component structures of 2 and 4
10505 bytes in size were passed padded upward.
10506
10507 The following arranges for best compatibility with previous
10508 versions of gcc, but removes the -mstrict-align dependency. */
10509 if (BYTES_BIG_ENDIAN)
10510 {
10511 HOST_WIDE_INT size = 0;
10512
10513 if (mode == BLKmode)
10514 {
10515 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10516 size = int_size_in_bytes (type);
10517 }
10518 else
10519 size = GET_MODE_SIZE (mode);
10520
10521 if (size == 1 || size == 2 || size == 4)
10522 return PAD_DOWNWARD;
10523 }
10524 return PAD_UPWARD;
10525 }
10526
10527 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10528 {
10529 if (type != 0 && AGGREGATE_TYPE_P (type))
10530 return PAD_UPWARD;
10531 }
10532
10533 /* Fall back to the default. */
10534 return default_function_arg_padding (mode, type);
10535 }
10536
10537 /* If defined, a C expression that gives the alignment boundary, in bits,
10538 of an argument with the specified mode and type. If it is not defined,
10539 PARM_BOUNDARY is used for all arguments.
10540
10541 V.4 wants long longs and doubles to be double word aligned. Just
10542 testing the mode size is a boneheaded way to do this as it means
10543 that other types such as complex int are also double word aligned.
10544 However, we're stuck with this because changing the ABI might break
10545 existing library interfaces.
10546
10547 Quadword align Altivec/VSX vectors.
10548 Quadword align large synthetic vector types. */
10549
10550 static unsigned int
10551 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10552 {
10553 machine_mode elt_mode;
10554 int n_elts;
10555
10556 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10557
10558 if (DEFAULT_ABI == ABI_V4
10559 && (GET_MODE_SIZE (mode) == 8
10560 || (TARGET_HARD_FLOAT
10561 && !is_complex_IBM_long_double (mode)
10562 && FLOAT128_2REG_P (mode))))
10563 return 64;
10564 else if (FLOAT128_VECTOR_P (mode))
10565 return 128;
10566 else if (type && TREE_CODE (type) == VECTOR_TYPE
10567 && int_size_in_bytes (type) >= 8
10568 && int_size_in_bytes (type) < 16)
10569 return 64;
10570 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10571 || (type && TREE_CODE (type) == VECTOR_TYPE
10572 && int_size_in_bytes (type) >= 16))
10573 return 128;
10574
10575 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10576 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10577 -mcompat-align-parm is used. */
10578 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10579 || DEFAULT_ABI == ABI_ELFv2)
10580 && type && TYPE_ALIGN (type) > 64)
10581 {
10582 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10583 or homogeneous float/vector aggregates here. We already handled
10584 vector aggregates above, but still need to check for float here. */
10585 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10586 && !SCALAR_FLOAT_MODE_P (elt_mode));
10587
10588 /* We used to check for BLKmode instead of the above aggregate type
10589 check. Warn when this results in any difference to the ABI. */
10590 if (aggregate_p != (mode == BLKmode))
10591 {
10592 static bool warned;
10593 if (!warned && warn_psabi)
10594 {
10595 warned = true;
10596 inform (input_location,
10597 "the ABI of passing aggregates with %d-byte alignment"
10598 " has changed in GCC 5",
10599 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10600 }
10601 }
10602
10603 if (aggregate_p)
10604 return 128;
10605 }
10606
10607 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10608 implement the "aggregate type" check as a BLKmode check here; this
10609 means certain aggregate types are in fact not aligned. */
10610 if (TARGET_MACHO && rs6000_darwin64_abi
10611 && mode == BLKmode
10612 && type && TYPE_ALIGN (type) > 64)
10613 return 128;
10614
10615 return PARM_BOUNDARY;
10616 }
10617
10618 /* The offset in words to the start of the parameter save area. */
10619
10620 static unsigned int
10621 rs6000_parm_offset (void)
10622 {
10623 return (DEFAULT_ABI == ABI_V4 ? 2
10624 : DEFAULT_ABI == ABI_ELFv2 ? 4
10625 : 6);
10626 }
10627
10628 /* For a function parm of MODE and TYPE, return the starting word in
10629 the parameter area. NWORDS of the parameter area are already used. */
10630
10631 static unsigned int
10632 rs6000_parm_start (machine_mode mode, const_tree type,
10633 unsigned int nwords)
10634 {
10635 unsigned int align;
10636
10637 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10638 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10639 }
10640
10641 /* Compute the size (in words) of a function argument. */
10642
10643 static unsigned long
10644 rs6000_arg_size (machine_mode mode, const_tree type)
10645 {
10646 unsigned long size;
10647
10648 if (mode != BLKmode)
10649 size = GET_MODE_SIZE (mode);
10650 else
10651 size = int_size_in_bytes (type);
10652
10653 if (TARGET_32BIT)
10654 return (size + 3) >> 2;
10655 else
10656 return (size + 7) >> 3;
10657 }
10658 \f
10659 /* Use this to flush pending int fields. */
10660
10661 static void
10662 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10663 HOST_WIDE_INT bitpos, int final)
10664 {
10665 unsigned int startbit, endbit;
10666 int intregs, intoffset;
10667
10668 /* Handle the situations where a float is taking up the first half
10669 of the GPR, and the other half is empty (typically due to
10670 alignment restrictions). We can detect this by a 8-byte-aligned
10671 int field, or by seeing that this is the final flush for this
10672 argument. Count the word and continue on. */
10673 if (cum->floats_in_gpr == 1
10674 && (cum->intoffset % 64 == 0
10675 || (cum->intoffset == -1 && final)))
10676 {
10677 cum->words++;
10678 cum->floats_in_gpr = 0;
10679 }
10680
10681 if (cum->intoffset == -1)
10682 return;
10683
10684 intoffset = cum->intoffset;
10685 cum->intoffset = -1;
10686 cum->floats_in_gpr = 0;
10687
10688 if (intoffset % BITS_PER_WORD != 0)
10689 {
10690 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
10691 if (!int_mode_for_size (bits, 0).exists ())
10692 {
10693 /* We couldn't find an appropriate mode, which happens,
10694 e.g., in packed structs when there are 3 bytes to load.
10695 Back intoffset back to the beginning of the word in this
10696 case. */
10697 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10698 }
10699 }
10700
10701 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10702 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10703 intregs = (endbit - startbit) / BITS_PER_WORD;
10704 cum->words += intregs;
10705 /* words should be unsigned. */
10706 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10707 {
10708 int pad = (endbit/BITS_PER_WORD) - cum->words;
10709 cum->words += pad;
10710 }
10711 }
10712
10713 /* The darwin64 ABI calls for us to recurse down through structs,
10714 looking for elements passed in registers. Unfortunately, we have
10715 to track int register count here also because of misalignments
10716 in powerpc alignment mode. */
10717
10718 static void
10719 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10720 const_tree type,
10721 HOST_WIDE_INT startbitpos)
10722 {
10723 tree f;
10724
10725 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10726 if (TREE_CODE (f) == FIELD_DECL)
10727 {
10728 HOST_WIDE_INT bitpos = startbitpos;
10729 tree ftype = TREE_TYPE (f);
10730 machine_mode mode;
10731 if (ftype == error_mark_node)
10732 continue;
10733 mode = TYPE_MODE (ftype);
10734
10735 if (DECL_SIZE (f) != 0
10736 && tree_fits_uhwi_p (bit_position (f)))
10737 bitpos += int_bit_position (f);
10738
10739 /* ??? FIXME: else assume zero offset. */
10740
10741 if (TREE_CODE (ftype) == RECORD_TYPE)
10742 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10743 else if (USE_FP_FOR_ARG_P (cum, mode))
10744 {
10745 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10746 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10747 cum->fregno += n_fpregs;
10748 /* Single-precision floats present a special problem for
10749 us, because they are smaller than an 8-byte GPR, and so
10750 the structure-packing rules combined with the standard
10751 varargs behavior mean that we want to pack float/float
10752 and float/int combinations into a single register's
10753 space. This is complicated by the arg advance flushing,
10754 which works on arbitrarily large groups of int-type
10755 fields. */
10756 if (mode == SFmode)
10757 {
10758 if (cum->floats_in_gpr == 1)
10759 {
10760 /* Two floats in a word; count the word and reset
10761 the float count. */
10762 cum->words++;
10763 cum->floats_in_gpr = 0;
10764 }
10765 else if (bitpos % 64 == 0)
10766 {
10767 /* A float at the beginning of an 8-byte word;
10768 count it and put off adjusting cum->words until
10769 we see if a arg advance flush is going to do it
10770 for us. */
10771 cum->floats_in_gpr++;
10772 }
10773 else
10774 {
10775 /* The float is at the end of a word, preceded
10776 by integer fields, so the arg advance flush
10777 just above has already set cum->words and
10778 everything is taken care of. */
10779 }
10780 }
10781 else
10782 cum->words += n_fpregs;
10783 }
10784 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10785 {
10786 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10787 cum->vregno++;
10788 cum->words += 2;
10789 }
10790 else if (cum->intoffset == -1)
10791 cum->intoffset = bitpos;
10792 }
10793 }
10794
10795 /* Check for an item that needs to be considered specially under the darwin 64
10796 bit ABI. These are record types where the mode is BLK or the structure is
10797 8 bytes in size. */
10798 static int
10799 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10800 {
10801 return rs6000_darwin64_abi
10802 && ((mode == BLKmode
10803 && TREE_CODE (type) == RECORD_TYPE
10804 && int_size_in_bytes (type) > 0)
10805 || (type && TREE_CODE (type) == RECORD_TYPE
10806 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10807 }
10808
10809 /* Update the data in CUM to advance over an argument
10810 of mode MODE and data type TYPE.
10811 (TYPE is null for libcalls where that information may not be available.)
10812
10813 Note that for args passed by reference, function_arg will be called
10814 with MODE and TYPE set to that of the pointer to the arg, not the arg
10815 itself. */
10816
10817 static void
10818 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10819 const_tree type, bool named, int depth)
10820 {
10821 machine_mode elt_mode;
10822 int n_elts;
10823
10824 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10825
10826 /* Only tick off an argument if we're not recursing. */
10827 if (depth == 0)
10828 cum->nargs_prototype--;
10829
10830 #ifdef HAVE_AS_GNU_ATTRIBUTE
10831 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
10832 && cum->escapes)
10833 {
10834 if (SCALAR_FLOAT_MODE_P (mode))
10835 {
10836 rs6000_passes_float = true;
10837 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10838 && (FLOAT128_IBM_P (mode)
10839 || FLOAT128_IEEE_P (mode)
10840 || (type != NULL
10841 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
10842 rs6000_passes_long_double = true;
10843
10844 /* Note if we passed or return a IEEE 128-bit type. We changed the
10845 mangling for these types, and we may need to make an alias with
10846 the old mangling. */
10847 if (FLOAT128_IEEE_P (mode))
10848 rs6000_passes_ieee128 = true;
10849 }
10850 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10851 rs6000_passes_vector = true;
10852 }
10853 #endif
10854
10855 if (TARGET_ALTIVEC_ABI
10856 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10857 || (type && TREE_CODE (type) == VECTOR_TYPE
10858 && int_size_in_bytes (type) == 16)))
10859 {
10860 bool stack = false;
10861
10862 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10863 {
10864 cum->vregno += n_elts;
10865
10866 if (!TARGET_ALTIVEC)
10867 error ("cannot pass argument in vector register because"
10868 " altivec instructions are disabled, use %qs"
10869 " to enable them", "-maltivec");
10870
10871 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10872 even if it is going to be passed in a vector register.
10873 Darwin does the same for variable-argument functions. */
10874 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10875 && TARGET_64BIT)
10876 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10877 stack = true;
10878 }
10879 else
10880 stack = true;
10881
10882 if (stack)
10883 {
10884 int align;
10885
10886 /* Vector parameters must be 16-byte aligned. In 32-bit
10887 mode this means we need to take into account the offset
10888 to the parameter save area. In 64-bit mode, they just
10889 have to start on an even word, since the parameter save
10890 area is 16-byte aligned. */
10891 if (TARGET_32BIT)
10892 align = -(rs6000_parm_offset () + cum->words) & 3;
10893 else
10894 align = cum->words & 1;
10895 cum->words += align + rs6000_arg_size (mode, type);
10896
10897 if (TARGET_DEBUG_ARG)
10898 {
10899 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10900 cum->words, align);
10901 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10902 cum->nargs_prototype, cum->prototype,
10903 GET_MODE_NAME (mode));
10904 }
10905 }
10906 }
10907 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10908 {
10909 int size = int_size_in_bytes (type);
10910 /* Variable sized types have size == -1 and are
10911 treated as if consisting entirely of ints.
10912 Pad to 16 byte boundary if needed. */
10913 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10914 && (cum->words % 2) != 0)
10915 cum->words++;
10916 /* For varargs, we can just go up by the size of the struct. */
10917 if (!named)
10918 cum->words += (size + 7) / 8;
10919 else
10920 {
10921 /* It is tempting to say int register count just goes up by
10922 sizeof(type)/8, but this is wrong in a case such as
10923 { int; double; int; } [powerpc alignment]. We have to
10924 grovel through the fields for these too. */
10925 cum->intoffset = 0;
10926 cum->floats_in_gpr = 0;
10927 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10928 rs6000_darwin64_record_arg_advance_flush (cum,
10929 size * BITS_PER_UNIT, 1);
10930 }
10931 if (TARGET_DEBUG_ARG)
10932 {
10933 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10934 cum->words, TYPE_ALIGN (type), size);
10935 fprintf (stderr,
10936 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10937 cum->nargs_prototype, cum->prototype,
10938 GET_MODE_NAME (mode));
10939 }
10940 }
10941 else if (DEFAULT_ABI == ABI_V4)
10942 {
10943 if (abi_v4_pass_in_fpr (mode, named))
10944 {
10945 /* _Decimal128 must use an even/odd register pair. This assumes
10946 that the register number is odd when fregno is odd. */
10947 if (mode == TDmode && (cum->fregno % 2) == 1)
10948 cum->fregno++;
10949
10950 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10951 <= FP_ARG_V4_MAX_REG)
10952 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10953 else
10954 {
10955 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10956 if (mode == DFmode || FLOAT128_IBM_P (mode)
10957 || mode == DDmode || mode == TDmode)
10958 cum->words += cum->words & 1;
10959 cum->words += rs6000_arg_size (mode, type);
10960 }
10961 }
10962 else
10963 {
10964 int n_words = rs6000_arg_size (mode, type);
10965 int gregno = cum->sysv_gregno;
10966
10967 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
10968 As does any other 2 word item such as complex int due to a
10969 historical mistake. */
10970 if (n_words == 2)
10971 gregno += (1 - gregno) & 1;
10972
10973 /* Multi-reg args are not split between registers and stack. */
10974 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10975 {
10976 /* Long long is aligned on the stack. So are other 2 word
10977 items such as complex int due to a historical mistake. */
10978 if (n_words == 2)
10979 cum->words += cum->words & 1;
10980 cum->words += n_words;
10981 }
10982
10983 /* Note: continuing to accumulate gregno past when we've started
10984 spilling to the stack indicates the fact that we've started
10985 spilling to the stack to expand_builtin_saveregs. */
10986 cum->sysv_gregno = gregno + n_words;
10987 }
10988
10989 if (TARGET_DEBUG_ARG)
10990 {
10991 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10992 cum->words, cum->fregno);
10993 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10994 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10995 fprintf (stderr, "mode = %4s, named = %d\n",
10996 GET_MODE_NAME (mode), named);
10997 }
10998 }
10999 else
11000 {
11001 int n_words = rs6000_arg_size (mode, type);
11002 int start_words = cum->words;
11003 int align_words = rs6000_parm_start (mode, type, start_words);
11004
11005 cum->words = align_words + n_words;
11006
11007 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11008 {
11009 /* _Decimal128 must be passed in an even/odd float register pair.
11010 This assumes that the register number is odd when fregno is
11011 odd. */
11012 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11013 cum->fregno++;
11014 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11015 }
11016
11017 if (TARGET_DEBUG_ARG)
11018 {
11019 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11020 cum->words, cum->fregno);
11021 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11022 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11023 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11024 named, align_words - start_words, depth);
11025 }
11026 }
11027 }
11028
11029 static void
11030 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11031 const_tree type, bool named)
11032 {
11033 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11034 0);
11035 }
11036
11037 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11038 structure between cum->intoffset and bitpos to integer registers. */
11039
11040 static void
11041 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11042 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11043 {
11044 machine_mode mode;
11045 unsigned int regno;
11046 unsigned int startbit, endbit;
11047 int this_regno, intregs, intoffset;
11048 rtx reg;
11049
11050 if (cum->intoffset == -1)
11051 return;
11052
11053 intoffset = cum->intoffset;
11054 cum->intoffset = -1;
11055
11056 /* If this is the trailing part of a word, try to only load that
11057 much into the register. Otherwise load the whole register. Note
11058 that in the latter case we may pick up unwanted bits. It's not a
11059 problem at the moment but may wish to revisit. */
11060
11061 if (intoffset % BITS_PER_WORD != 0)
11062 {
11063 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11064 if (!int_mode_for_size (bits, 0).exists (&mode))
11065 {
11066 /* We couldn't find an appropriate mode, which happens,
11067 e.g., in packed structs when there are 3 bytes to load.
11068 Back intoffset back to the beginning of the word in this
11069 case. */
11070 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11071 mode = word_mode;
11072 }
11073 }
11074 else
11075 mode = word_mode;
11076
11077 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11078 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11079 intregs = (endbit - startbit) / BITS_PER_WORD;
11080 this_regno = cum->words + intoffset / BITS_PER_WORD;
11081
11082 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11083 cum->use_stack = 1;
11084
11085 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11086 if (intregs <= 0)
11087 return;
11088
11089 intoffset /= BITS_PER_UNIT;
11090 do
11091 {
11092 regno = GP_ARG_MIN_REG + this_regno;
11093 reg = gen_rtx_REG (mode, regno);
11094 rvec[(*k)++] =
11095 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11096
11097 this_regno += 1;
11098 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11099 mode = word_mode;
11100 intregs -= 1;
11101 }
11102 while (intregs > 0);
11103 }
11104
11105 /* Recursive workhorse for the following. */
11106
11107 static void
11108 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11109 HOST_WIDE_INT startbitpos, rtx rvec[],
11110 int *k)
11111 {
11112 tree f;
11113
11114 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11115 if (TREE_CODE (f) == FIELD_DECL)
11116 {
11117 HOST_WIDE_INT bitpos = startbitpos;
11118 tree ftype = TREE_TYPE (f);
11119 machine_mode mode;
11120 if (ftype == error_mark_node)
11121 continue;
11122 mode = TYPE_MODE (ftype);
11123
11124 if (DECL_SIZE (f) != 0
11125 && tree_fits_uhwi_p (bit_position (f)))
11126 bitpos += int_bit_position (f);
11127
11128 /* ??? FIXME: else assume zero offset. */
11129
11130 if (TREE_CODE (ftype) == RECORD_TYPE)
11131 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11132 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11133 {
11134 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11135 #if 0
11136 switch (mode)
11137 {
11138 case E_SCmode: mode = SFmode; break;
11139 case E_DCmode: mode = DFmode; break;
11140 case E_TCmode: mode = TFmode; break;
11141 default: break;
11142 }
11143 #endif
11144 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11145 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11146 {
11147 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11148 && (mode == TFmode || mode == TDmode));
11149 /* Long double or _Decimal128 split over regs and memory. */
11150 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11151 cum->use_stack=1;
11152 }
11153 rvec[(*k)++]
11154 = gen_rtx_EXPR_LIST (VOIDmode,
11155 gen_rtx_REG (mode, cum->fregno++),
11156 GEN_INT (bitpos / BITS_PER_UNIT));
11157 if (FLOAT128_2REG_P (mode))
11158 cum->fregno++;
11159 }
11160 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11161 {
11162 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11163 rvec[(*k)++]
11164 = gen_rtx_EXPR_LIST (VOIDmode,
11165 gen_rtx_REG (mode, cum->vregno++),
11166 GEN_INT (bitpos / BITS_PER_UNIT));
11167 }
11168 else if (cum->intoffset == -1)
11169 cum->intoffset = bitpos;
11170 }
11171 }
11172
11173 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11174 the register(s) to be used for each field and subfield of a struct
11175 being passed by value, along with the offset of where the
11176 register's value may be found in the block. FP fields go in FP
11177 register, vector fields go in vector registers, and everything
11178 else goes in int registers, packed as in memory.
11179
11180 This code is also used for function return values. RETVAL indicates
11181 whether this is the case.
11182
11183 Much of this is taken from the SPARC V9 port, which has a similar
11184 calling convention. */
11185
11186 static rtx
11187 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11188 bool named, bool retval)
11189 {
11190 rtx rvec[FIRST_PSEUDO_REGISTER];
11191 int k = 1, kbase = 1;
11192 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11193 /* This is a copy; modifications are not visible to our caller. */
11194 CUMULATIVE_ARGS copy_cum = *orig_cum;
11195 CUMULATIVE_ARGS *cum = &copy_cum;
11196
11197 /* Pad to 16 byte boundary if needed. */
11198 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11199 && (cum->words % 2) != 0)
11200 cum->words++;
11201
11202 cum->intoffset = 0;
11203 cum->use_stack = 0;
11204 cum->named = named;
11205
11206 /* Put entries into rvec[] for individual FP and vector fields, and
11207 for the chunks of memory that go in int regs. Note we start at
11208 element 1; 0 is reserved for an indication of using memory, and
11209 may or may not be filled in below. */
11210 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11211 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11212
11213 /* If any part of the struct went on the stack put all of it there.
11214 This hack is because the generic code for
11215 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11216 parts of the struct are not at the beginning. */
11217 if (cum->use_stack)
11218 {
11219 if (retval)
11220 return NULL_RTX; /* doesn't go in registers at all */
11221 kbase = 0;
11222 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11223 }
11224 if (k > 1 || cum->use_stack)
11225 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11226 else
11227 return NULL_RTX;
11228 }
11229
11230 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11231
11232 static rtx
11233 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11234 int align_words)
11235 {
11236 int n_units;
11237 int i, k;
11238 rtx rvec[GP_ARG_NUM_REG + 1];
11239
11240 if (align_words >= GP_ARG_NUM_REG)
11241 return NULL_RTX;
11242
11243 n_units = rs6000_arg_size (mode, type);
11244
11245 /* Optimize the simple case where the arg fits in one gpr, except in
11246 the case of BLKmode due to assign_parms assuming that registers are
11247 BITS_PER_WORD wide. */
11248 if (n_units == 0
11249 || (n_units == 1 && mode != BLKmode))
11250 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11251
11252 k = 0;
11253 if (align_words + n_units > GP_ARG_NUM_REG)
11254 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11255 using a magic NULL_RTX component.
11256 This is not strictly correct. Only some of the arg belongs in
11257 memory, not all of it. However, the normal scheme using
11258 function_arg_partial_nregs can result in unusual subregs, eg.
11259 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11260 store the whole arg to memory is often more efficient than code
11261 to store pieces, and we know that space is available in the right
11262 place for the whole arg. */
11263 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11264
11265 i = 0;
11266 do
11267 {
11268 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11269 rtx off = GEN_INT (i++ * 4);
11270 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11271 }
11272 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11273
11274 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11275 }
11276
11277 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11278 but must also be copied into the parameter save area starting at
11279 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11280 to the GPRs and/or memory. Return the number of elements used. */
11281
11282 static int
11283 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11284 int align_words, rtx *rvec)
11285 {
11286 int k = 0;
11287
11288 if (align_words < GP_ARG_NUM_REG)
11289 {
11290 int n_words = rs6000_arg_size (mode, type);
11291
11292 if (align_words + n_words > GP_ARG_NUM_REG
11293 || mode == BLKmode
11294 || (TARGET_32BIT && TARGET_POWERPC64))
11295 {
11296 /* If this is partially on the stack, then we only
11297 include the portion actually in registers here. */
11298 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11299 int i = 0;
11300
11301 if (align_words + n_words > GP_ARG_NUM_REG)
11302 {
11303 /* Not all of the arg fits in gprs. Say that it goes in memory
11304 too, using a magic NULL_RTX component. Also see comment in
11305 rs6000_mixed_function_arg for why the normal
11306 function_arg_partial_nregs scheme doesn't work in this case. */
11307 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11308 }
11309
11310 do
11311 {
11312 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11313 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11314 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11315 }
11316 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11317 }
11318 else
11319 {
11320 /* The whole arg fits in gprs. */
11321 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11322 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11323 }
11324 }
11325 else
11326 {
11327 /* It's entirely in memory. */
11328 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11329 }
11330
11331 return k;
11332 }
11333
11334 /* RVEC is a vector of K components of an argument of mode MODE.
11335 Construct the final function_arg return value from it. */
11336
11337 static rtx
11338 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11339 {
11340 gcc_assert (k >= 1);
11341
11342 /* Avoid returning a PARALLEL in the trivial cases. */
11343 if (k == 1)
11344 {
11345 if (XEXP (rvec[0], 0) == NULL_RTX)
11346 return NULL_RTX;
11347
11348 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11349 return XEXP (rvec[0], 0);
11350 }
11351
11352 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11353 }
11354
11355 /* Determine where to put an argument to a function.
11356 Value is zero to push the argument on the stack,
11357 or a hard register in which to store the argument.
11358
11359 MODE is the argument's machine mode.
11360 TYPE is the data type of the argument (as a tree).
11361 This is null for libcalls where that information may
11362 not be available.
11363 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11364 the preceding args and about the function being called. It is
11365 not modified in this routine.
11366 NAMED is nonzero if this argument is a named parameter
11367 (otherwise it is an extra parameter matching an ellipsis).
11368
11369 On RS/6000 the first eight words of non-FP are normally in registers
11370 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11371 Under V.4, the first 8 FP args are in registers.
11372
11373 If this is floating-point and no prototype is specified, we use
11374 both an FP and integer register (or possibly FP reg and stack). Library
11375 functions (when CALL_LIBCALL is set) always have the proper types for args,
11376 so we can pass the FP value just in one register. emit_library_function
11377 doesn't support PARALLEL anyway.
11378
11379 Note that for args passed by reference, function_arg will be called
11380 with MODE and TYPE set to that of the pointer to the arg, not the arg
11381 itself. */
11382
11383 static rtx
11384 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11385 const_tree type, bool named)
11386 {
11387 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11388 enum rs6000_abi abi = DEFAULT_ABI;
11389 machine_mode elt_mode;
11390 int n_elts;
11391
11392 /* Return a marker to indicate whether CR1 needs to set or clear the
11393 bit that V.4 uses to say fp args were passed in registers.
11394 Assume that we don't need the marker for software floating point,
11395 or compiler generated library calls. */
11396 if (mode == VOIDmode)
11397 {
11398 if (abi == ABI_V4
11399 && (cum->call_cookie & CALL_LIBCALL) == 0
11400 && (cum->stdarg
11401 || (cum->nargs_prototype < 0
11402 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11403 && TARGET_HARD_FLOAT)
11404 return GEN_INT (cum->call_cookie
11405 | ((cum->fregno == FP_ARG_MIN_REG)
11406 ? CALL_V4_SET_FP_ARGS
11407 : CALL_V4_CLEAR_FP_ARGS));
11408
11409 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11410 }
11411
11412 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11413
11414 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11415 {
11416 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11417 if (rslt != NULL_RTX)
11418 return rslt;
11419 /* Else fall through to usual handling. */
11420 }
11421
11422 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11423 {
11424 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11425 rtx r, off;
11426 int i, k = 0;
11427
11428 /* Do we also need to pass this argument in the parameter save area?
11429 Library support functions for IEEE 128-bit are assumed to not need the
11430 value passed both in GPRs and in vector registers. */
11431 if (TARGET_64BIT && !cum->prototype
11432 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11433 {
11434 int align_words = ROUND_UP (cum->words, 2);
11435 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11436 }
11437
11438 /* Describe where this argument goes in the vector registers. */
11439 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11440 {
11441 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11442 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11443 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11444 }
11445
11446 return rs6000_finish_function_arg (mode, rvec, k);
11447 }
11448 else if (TARGET_ALTIVEC_ABI
11449 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11450 || (type && TREE_CODE (type) == VECTOR_TYPE
11451 && int_size_in_bytes (type) == 16)))
11452 {
11453 if (named || abi == ABI_V4)
11454 return NULL_RTX;
11455 else
11456 {
11457 /* Vector parameters to varargs functions under AIX or Darwin
11458 get passed in memory and possibly also in GPRs. */
11459 int align, align_words, n_words;
11460 machine_mode part_mode;
11461
11462 /* Vector parameters must be 16-byte aligned. In 32-bit
11463 mode this means we need to take into account the offset
11464 to the parameter save area. In 64-bit mode, they just
11465 have to start on an even word, since the parameter save
11466 area is 16-byte aligned. */
11467 if (TARGET_32BIT)
11468 align = -(rs6000_parm_offset () + cum->words) & 3;
11469 else
11470 align = cum->words & 1;
11471 align_words = cum->words + align;
11472
11473 /* Out of registers? Memory, then. */
11474 if (align_words >= GP_ARG_NUM_REG)
11475 return NULL_RTX;
11476
11477 if (TARGET_32BIT && TARGET_POWERPC64)
11478 return rs6000_mixed_function_arg (mode, type, align_words);
11479
11480 /* The vector value goes in GPRs. Only the part of the
11481 value in GPRs is reported here. */
11482 part_mode = mode;
11483 n_words = rs6000_arg_size (mode, type);
11484 if (align_words + n_words > GP_ARG_NUM_REG)
11485 /* Fortunately, there are only two possibilities, the value
11486 is either wholly in GPRs or half in GPRs and half not. */
11487 part_mode = DImode;
11488
11489 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11490 }
11491 }
11492
11493 else if (abi == ABI_V4)
11494 {
11495 if (abi_v4_pass_in_fpr (mode, named))
11496 {
11497 /* _Decimal128 must use an even/odd register pair. This assumes
11498 that the register number is odd when fregno is odd. */
11499 if (mode == TDmode && (cum->fregno % 2) == 1)
11500 cum->fregno++;
11501
11502 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11503 <= FP_ARG_V4_MAX_REG)
11504 return gen_rtx_REG (mode, cum->fregno);
11505 else
11506 return NULL_RTX;
11507 }
11508 else
11509 {
11510 int n_words = rs6000_arg_size (mode, type);
11511 int gregno = cum->sysv_gregno;
11512
11513 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11514 As does any other 2 word item such as complex int due to a
11515 historical mistake. */
11516 if (n_words == 2)
11517 gregno += (1 - gregno) & 1;
11518
11519 /* Multi-reg args are not split between registers and stack. */
11520 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11521 return NULL_RTX;
11522
11523 if (TARGET_32BIT && TARGET_POWERPC64)
11524 return rs6000_mixed_function_arg (mode, type,
11525 gregno - GP_ARG_MIN_REG);
11526 return gen_rtx_REG (mode, gregno);
11527 }
11528 }
11529 else
11530 {
11531 int align_words = rs6000_parm_start (mode, type, cum->words);
11532
11533 /* _Decimal128 must be passed in an even/odd float register pair.
11534 This assumes that the register number is odd when fregno is odd. */
11535 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11536 cum->fregno++;
11537
11538 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11539 && !(TARGET_AIX && !TARGET_ELF
11540 && type != NULL && AGGREGATE_TYPE_P (type)))
11541 {
11542 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11543 rtx r, off;
11544 int i, k = 0;
11545 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11546 int fpr_words;
11547
11548 /* Do we also need to pass this argument in the parameter
11549 save area? */
11550 if (type && (cum->nargs_prototype <= 0
11551 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11552 && TARGET_XL_COMPAT
11553 && align_words >= GP_ARG_NUM_REG)))
11554 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11555
11556 /* Describe where this argument goes in the fprs. */
11557 for (i = 0; i < n_elts
11558 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11559 {
11560 /* Check if the argument is split over registers and memory.
11561 This can only ever happen for long double or _Decimal128;
11562 complex types are handled via split_complex_arg. */
11563 machine_mode fmode = elt_mode;
11564 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11565 {
11566 gcc_assert (FLOAT128_2REG_P (fmode));
11567 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11568 }
11569
11570 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11571 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11572 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11573 }
11574
11575 /* If there were not enough FPRs to hold the argument, the rest
11576 usually goes into memory. However, if the current position
11577 is still within the register parameter area, a portion may
11578 actually have to go into GPRs.
11579
11580 Note that it may happen that the portion of the argument
11581 passed in the first "half" of the first GPR was already
11582 passed in the last FPR as well.
11583
11584 For unnamed arguments, we already set up GPRs to cover the
11585 whole argument in rs6000_psave_function_arg, so there is
11586 nothing further to do at this point. */
11587 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11588 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11589 && cum->nargs_prototype > 0)
11590 {
11591 static bool warned;
11592
11593 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11594 int n_words = rs6000_arg_size (mode, type);
11595
11596 align_words += fpr_words;
11597 n_words -= fpr_words;
11598
11599 do
11600 {
11601 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11602 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11603 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11604 }
11605 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11606
11607 if (!warned && warn_psabi)
11608 {
11609 warned = true;
11610 inform (input_location,
11611 "the ABI of passing homogeneous %<float%> aggregates"
11612 " has changed in GCC 5");
11613 }
11614 }
11615
11616 return rs6000_finish_function_arg (mode, rvec, k);
11617 }
11618 else if (align_words < GP_ARG_NUM_REG)
11619 {
11620 if (TARGET_32BIT && TARGET_POWERPC64)
11621 return rs6000_mixed_function_arg (mode, type, align_words);
11622
11623 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11624 }
11625 else
11626 return NULL_RTX;
11627 }
11628 }
11629 \f
11630 /* For an arg passed partly in registers and partly in memory, this is
11631 the number of bytes passed in registers. For args passed entirely in
11632 registers or entirely in memory, zero. When an arg is described by a
11633 PARALLEL, perhaps using more than one register type, this function
11634 returns the number of bytes used by the first element of the PARALLEL. */
11635
11636 static int
11637 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11638 tree type, bool named)
11639 {
11640 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11641 bool passed_in_gprs = true;
11642 int ret = 0;
11643 int align_words;
11644 machine_mode elt_mode;
11645 int n_elts;
11646
11647 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11648
11649 if (DEFAULT_ABI == ABI_V4)
11650 return 0;
11651
11652 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11653 {
11654 /* If we are passing this arg in the fixed parameter save area (gprs or
11655 memory) as well as VRs, we do not use the partial bytes mechanism;
11656 instead, rs6000_function_arg will return a PARALLEL including a memory
11657 element as necessary. Library support functions for IEEE 128-bit are
11658 assumed to not need the value passed both in GPRs and in vector
11659 registers. */
11660 if (TARGET_64BIT && !cum->prototype
11661 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11662 return 0;
11663
11664 /* Otherwise, we pass in VRs only. Check for partial copies. */
11665 passed_in_gprs = false;
11666 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11667 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11668 }
11669
11670 /* In this complicated case we just disable the partial_nregs code. */
11671 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11672 return 0;
11673
11674 align_words = rs6000_parm_start (mode, type, cum->words);
11675
11676 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11677 && !(TARGET_AIX && !TARGET_ELF
11678 && type != NULL && AGGREGATE_TYPE_P (type)))
11679 {
11680 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11681
11682 /* If we are passing this arg in the fixed parameter save area
11683 (gprs or memory) as well as FPRs, we do not use the partial
11684 bytes mechanism; instead, rs6000_function_arg will return a
11685 PARALLEL including a memory element as necessary. */
11686 if (type
11687 && (cum->nargs_prototype <= 0
11688 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11689 && TARGET_XL_COMPAT
11690 && align_words >= GP_ARG_NUM_REG)))
11691 return 0;
11692
11693 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11694 passed_in_gprs = false;
11695 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11696 {
11697 /* Compute number of bytes / words passed in FPRs. If there
11698 is still space available in the register parameter area
11699 *after* that amount, a part of the argument will be passed
11700 in GPRs. In that case, the total amount passed in any
11701 registers is equal to the amount that would have been passed
11702 in GPRs if everything were passed there, so we fall back to
11703 the GPR code below to compute the appropriate value. */
11704 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11705 * MIN (8, GET_MODE_SIZE (elt_mode)));
11706 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11707
11708 if (align_words + fpr_words < GP_ARG_NUM_REG)
11709 passed_in_gprs = true;
11710 else
11711 ret = fpr;
11712 }
11713 }
11714
11715 if (passed_in_gprs
11716 && align_words < GP_ARG_NUM_REG
11717 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11718 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11719
11720 if (ret != 0 && TARGET_DEBUG_ARG)
11721 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11722
11723 return ret;
11724 }
11725 \f
11726 /* A C expression that indicates when an argument must be passed by
11727 reference. If nonzero for an argument, a copy of that argument is
11728 made in memory and a pointer to the argument is passed instead of
11729 the argument itself. The pointer is passed in whatever way is
11730 appropriate for passing a pointer to that type.
11731
11732 Under V.4, aggregates and long double are passed by reference.
11733
11734 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11735 reference unless the AltiVec vector extension ABI is in force.
11736
11737 As an extension to all ABIs, variable sized types are passed by
11738 reference. */
11739
11740 static bool
11741 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11742 machine_mode mode, const_tree type,
11743 bool named ATTRIBUTE_UNUSED)
11744 {
11745 if (!type)
11746 return 0;
11747
11748 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11749 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11750 {
11751 if (TARGET_DEBUG_ARG)
11752 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11753 return 1;
11754 }
11755
11756 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11757 {
11758 if (TARGET_DEBUG_ARG)
11759 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11760 return 1;
11761 }
11762
11763 if (int_size_in_bytes (type) < 0)
11764 {
11765 if (TARGET_DEBUG_ARG)
11766 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11767 return 1;
11768 }
11769
11770 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11771 modes only exist for GCC vector types if -maltivec. */
11772 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11773 {
11774 if (TARGET_DEBUG_ARG)
11775 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11776 return 1;
11777 }
11778
11779 /* Pass synthetic vectors in memory. */
11780 if (TREE_CODE (type) == VECTOR_TYPE
11781 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11782 {
11783 static bool warned_for_pass_big_vectors = false;
11784 if (TARGET_DEBUG_ARG)
11785 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11786 if (!warned_for_pass_big_vectors)
11787 {
11788 warning (OPT_Wpsabi, "GCC vector passed by reference: "
11789 "non-standard ABI extension with no compatibility "
11790 "guarantee");
11791 warned_for_pass_big_vectors = true;
11792 }
11793 return 1;
11794 }
11795
11796 return 0;
11797 }
11798
11799 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11800 already processes. Return true if the parameter must be passed
11801 (fully or partially) on the stack. */
11802
11803 static bool
11804 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11805 {
11806 machine_mode mode;
11807 int unsignedp;
11808 rtx entry_parm;
11809
11810 /* Catch errors. */
11811 if (type == NULL || type == error_mark_node)
11812 return true;
11813
11814 /* Handle types with no storage requirement. */
11815 if (TYPE_MODE (type) == VOIDmode)
11816 return false;
11817
11818 /* Handle complex types. */
11819 if (TREE_CODE (type) == COMPLEX_TYPE)
11820 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11821 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11822
11823 /* Handle transparent aggregates. */
11824 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11825 && TYPE_TRANSPARENT_AGGR (type))
11826 type = TREE_TYPE (first_field (type));
11827
11828 /* See if this arg was passed by invisible reference. */
11829 if (pass_by_reference (get_cumulative_args (args_so_far),
11830 TYPE_MODE (type), type, true))
11831 type = build_pointer_type (type);
11832
11833 /* Find mode as it is passed by the ABI. */
11834 unsignedp = TYPE_UNSIGNED (type);
11835 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11836
11837 /* If we must pass in stack, we need a stack. */
11838 if (rs6000_must_pass_in_stack (mode, type))
11839 return true;
11840
11841 /* If there is no incoming register, we need a stack. */
11842 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11843 if (entry_parm == NULL)
11844 return true;
11845
11846 /* Likewise if we need to pass both in registers and on the stack. */
11847 if (GET_CODE (entry_parm) == PARALLEL
11848 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11849 return true;
11850
11851 /* Also true if we're partially in registers and partially not. */
11852 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11853 return true;
11854
11855 /* Update info on where next arg arrives in registers. */
11856 rs6000_function_arg_advance (args_so_far, mode, type, true);
11857 return false;
11858 }
11859
11860 /* Return true if FUN has no prototype, has a variable argument
11861 list, or passes any parameter in memory. */
11862
11863 static bool
11864 rs6000_function_parms_need_stack (tree fun, bool incoming)
11865 {
11866 tree fntype, result;
11867 CUMULATIVE_ARGS args_so_far_v;
11868 cumulative_args_t args_so_far;
11869
11870 if (!fun)
11871 /* Must be a libcall, all of which only use reg parms. */
11872 return false;
11873
11874 fntype = fun;
11875 if (!TYPE_P (fun))
11876 fntype = TREE_TYPE (fun);
11877
11878 /* Varargs functions need the parameter save area. */
11879 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11880 return true;
11881
11882 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11883 args_so_far = pack_cumulative_args (&args_so_far_v);
11884
11885 /* When incoming, we will have been passed the function decl.
11886 It is necessary to use the decl to handle K&R style functions,
11887 where TYPE_ARG_TYPES may not be available. */
11888 if (incoming)
11889 {
11890 gcc_assert (DECL_P (fun));
11891 result = DECL_RESULT (fun);
11892 }
11893 else
11894 result = TREE_TYPE (fntype);
11895
11896 if (result && aggregate_value_p (result, fntype))
11897 {
11898 if (!TYPE_P (result))
11899 result = TREE_TYPE (result);
11900 result = build_pointer_type (result);
11901 rs6000_parm_needs_stack (args_so_far, result);
11902 }
11903
11904 if (incoming)
11905 {
11906 tree parm;
11907
11908 for (parm = DECL_ARGUMENTS (fun);
11909 parm && parm != void_list_node;
11910 parm = TREE_CHAIN (parm))
11911 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11912 return true;
11913 }
11914 else
11915 {
11916 function_args_iterator args_iter;
11917 tree arg_type;
11918
11919 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11920 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11921 return true;
11922 }
11923
11924 return false;
11925 }
11926
11927 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11928 usually a constant depending on the ABI. However, in the ELFv2 ABI
11929 the register parameter area is optional when calling a function that
11930 has a prototype is scope, has no variable argument list, and passes
11931 all parameters in registers. */
11932
11933 int
11934 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11935 {
11936 int reg_parm_stack_space;
11937
11938 switch (DEFAULT_ABI)
11939 {
11940 default:
11941 reg_parm_stack_space = 0;
11942 break;
11943
11944 case ABI_AIX:
11945 case ABI_DARWIN:
11946 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11947 break;
11948
11949 case ABI_ELFv2:
11950 /* ??? Recomputing this every time is a bit expensive. Is there
11951 a place to cache this information? */
11952 if (rs6000_function_parms_need_stack (fun, incoming))
11953 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11954 else
11955 reg_parm_stack_space = 0;
11956 break;
11957 }
11958
11959 return reg_parm_stack_space;
11960 }
11961
11962 static void
11963 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11964 {
11965 int i;
11966 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11967
11968 if (nregs == 0)
11969 return;
11970
11971 for (i = 0; i < nregs; i++)
11972 {
11973 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11974 if (reload_completed)
11975 {
11976 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11977 tem = NULL_RTX;
11978 else
11979 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11980 i * GET_MODE_SIZE (reg_mode));
11981 }
11982 else
11983 tem = replace_equiv_address (tem, XEXP (tem, 0));
11984
11985 gcc_assert (tem);
11986
11987 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11988 }
11989 }
11990 \f
11991 /* Perform any needed actions needed for a function that is receiving a
11992 variable number of arguments.
11993
11994 CUM is as above.
11995
11996 MODE and TYPE are the mode and type of the current parameter.
11997
11998 PRETEND_SIZE is a variable that should be set to the amount of stack
11999 that must be pushed by the prolog to pretend that our caller pushed
12000 it.
12001
12002 Normally, this macro will push all remaining incoming registers on the
12003 stack and set PRETEND_SIZE to the length of the registers pushed. */
12004
12005 static void
12006 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12007 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12008 int no_rtl)
12009 {
12010 CUMULATIVE_ARGS next_cum;
12011 int reg_size = TARGET_32BIT ? 4 : 8;
12012 rtx save_area = NULL_RTX, mem;
12013 int first_reg_offset;
12014 alias_set_type set;
12015
12016 /* Skip the last named argument. */
12017 next_cum = *get_cumulative_args (cum);
12018 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12019
12020 if (DEFAULT_ABI == ABI_V4)
12021 {
12022 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12023
12024 if (! no_rtl)
12025 {
12026 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12027 HOST_WIDE_INT offset = 0;
12028
12029 /* Try to optimize the size of the varargs save area.
12030 The ABI requires that ap.reg_save_area is doubleword
12031 aligned, but we don't need to allocate space for all
12032 the bytes, only those to which we actually will save
12033 anything. */
12034 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12035 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12036 if (TARGET_HARD_FLOAT
12037 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12038 && cfun->va_list_fpr_size)
12039 {
12040 if (gpr_reg_num)
12041 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12042 * UNITS_PER_FP_WORD;
12043 if (cfun->va_list_fpr_size
12044 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12045 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12046 else
12047 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12048 * UNITS_PER_FP_WORD;
12049 }
12050 if (gpr_reg_num)
12051 {
12052 offset = -((first_reg_offset * reg_size) & ~7);
12053 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12054 {
12055 gpr_reg_num = cfun->va_list_gpr_size;
12056 if (reg_size == 4 && (first_reg_offset & 1))
12057 gpr_reg_num++;
12058 }
12059 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12060 }
12061 else if (fpr_size)
12062 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12063 * UNITS_PER_FP_WORD
12064 - (int) (GP_ARG_NUM_REG * reg_size);
12065
12066 if (gpr_size + fpr_size)
12067 {
12068 rtx reg_save_area
12069 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12070 gcc_assert (MEM_P (reg_save_area));
12071 reg_save_area = XEXP (reg_save_area, 0);
12072 if (GET_CODE (reg_save_area) == PLUS)
12073 {
12074 gcc_assert (XEXP (reg_save_area, 0)
12075 == virtual_stack_vars_rtx);
12076 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
12077 offset += INTVAL (XEXP (reg_save_area, 1));
12078 }
12079 else
12080 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12081 }
12082
12083 cfun->machine->varargs_save_offset = offset;
12084 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12085 }
12086 }
12087 else
12088 {
12089 first_reg_offset = next_cum.words;
12090 save_area = crtl->args.internal_arg_pointer;
12091
12092 if (targetm.calls.must_pass_in_stack (mode, type))
12093 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12094 }
12095
12096 set = get_varargs_alias_set ();
12097 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12098 && cfun->va_list_gpr_size)
12099 {
12100 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12101
12102 if (va_list_gpr_counter_field)
12103 /* V4 va_list_gpr_size counts number of registers needed. */
12104 n_gpr = cfun->va_list_gpr_size;
12105 else
12106 /* char * va_list instead counts number of bytes needed. */
12107 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12108
12109 if (nregs > n_gpr)
12110 nregs = n_gpr;
12111
12112 mem = gen_rtx_MEM (BLKmode,
12113 plus_constant (Pmode, save_area,
12114 first_reg_offset * reg_size));
12115 MEM_NOTRAP_P (mem) = 1;
12116 set_mem_alias_set (mem, set);
12117 set_mem_align (mem, BITS_PER_WORD);
12118
12119 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12120 nregs);
12121 }
12122
12123 /* Save FP registers if needed. */
12124 if (DEFAULT_ABI == ABI_V4
12125 && TARGET_HARD_FLOAT
12126 && ! no_rtl
12127 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12128 && cfun->va_list_fpr_size)
12129 {
12130 int fregno = next_cum.fregno, nregs;
12131 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12132 rtx lab = gen_label_rtx ();
12133 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12134 * UNITS_PER_FP_WORD);
12135
12136 emit_jump_insn
12137 (gen_rtx_SET (pc_rtx,
12138 gen_rtx_IF_THEN_ELSE (VOIDmode,
12139 gen_rtx_NE (VOIDmode, cr1,
12140 const0_rtx),
12141 gen_rtx_LABEL_REF (VOIDmode, lab),
12142 pc_rtx)));
12143
12144 for (nregs = 0;
12145 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12146 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12147 {
12148 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12149 plus_constant (Pmode, save_area, off));
12150 MEM_NOTRAP_P (mem) = 1;
12151 set_mem_alias_set (mem, set);
12152 set_mem_align (mem, GET_MODE_ALIGNMENT (
12153 TARGET_HARD_FLOAT ? DFmode : SFmode));
12154 emit_move_insn (mem, gen_rtx_REG (
12155 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12156 }
12157
12158 emit_label (lab);
12159 }
12160 }
12161
12162 /* Create the va_list data type. */
12163
12164 static tree
12165 rs6000_build_builtin_va_list (void)
12166 {
12167 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12168
12169 /* For AIX, prefer 'char *' because that's what the system
12170 header files like. */
12171 if (DEFAULT_ABI != ABI_V4)
12172 return build_pointer_type (char_type_node);
12173
12174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12175 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12176 get_identifier ("__va_list_tag"), record);
12177
12178 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12179 unsigned_char_type_node);
12180 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12181 unsigned_char_type_node);
12182 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12183 every user file. */
12184 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12185 get_identifier ("reserved"), short_unsigned_type_node);
12186 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12187 get_identifier ("overflow_arg_area"),
12188 ptr_type_node);
12189 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12190 get_identifier ("reg_save_area"),
12191 ptr_type_node);
12192
12193 va_list_gpr_counter_field = f_gpr;
12194 va_list_fpr_counter_field = f_fpr;
12195
12196 DECL_FIELD_CONTEXT (f_gpr) = record;
12197 DECL_FIELD_CONTEXT (f_fpr) = record;
12198 DECL_FIELD_CONTEXT (f_res) = record;
12199 DECL_FIELD_CONTEXT (f_ovf) = record;
12200 DECL_FIELD_CONTEXT (f_sav) = record;
12201
12202 TYPE_STUB_DECL (record) = type_decl;
12203 TYPE_NAME (record) = type_decl;
12204 TYPE_FIELDS (record) = f_gpr;
12205 DECL_CHAIN (f_gpr) = f_fpr;
12206 DECL_CHAIN (f_fpr) = f_res;
12207 DECL_CHAIN (f_res) = f_ovf;
12208 DECL_CHAIN (f_ovf) = f_sav;
12209
12210 layout_type (record);
12211
12212 /* The correct type is an array type of one element. */
12213 return build_array_type (record, build_index_type (size_zero_node));
12214 }
12215
12216 /* Implement va_start. */
12217
12218 static void
12219 rs6000_va_start (tree valist, rtx nextarg)
12220 {
12221 HOST_WIDE_INT words, n_gpr, n_fpr;
12222 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12223 tree gpr, fpr, ovf, sav, t;
12224
12225 /* Only SVR4 needs something special. */
12226 if (DEFAULT_ABI != ABI_V4)
12227 {
12228 std_expand_builtin_va_start (valist, nextarg);
12229 return;
12230 }
12231
12232 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12233 f_fpr = DECL_CHAIN (f_gpr);
12234 f_res = DECL_CHAIN (f_fpr);
12235 f_ovf = DECL_CHAIN (f_res);
12236 f_sav = DECL_CHAIN (f_ovf);
12237
12238 valist = build_simple_mem_ref (valist);
12239 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12240 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12241 f_fpr, NULL_TREE);
12242 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12243 f_ovf, NULL_TREE);
12244 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12245 f_sav, NULL_TREE);
12246
12247 /* Count number of gp and fp argument registers used. */
12248 words = crtl->args.info.words;
12249 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12250 GP_ARG_NUM_REG);
12251 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12252 FP_ARG_NUM_REG);
12253
12254 if (TARGET_DEBUG_ARG)
12255 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12256 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12257 words, n_gpr, n_fpr);
12258
12259 if (cfun->va_list_gpr_size)
12260 {
12261 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12262 build_int_cst (NULL_TREE, n_gpr));
12263 TREE_SIDE_EFFECTS (t) = 1;
12264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12265 }
12266
12267 if (cfun->va_list_fpr_size)
12268 {
12269 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12270 build_int_cst (NULL_TREE, n_fpr));
12271 TREE_SIDE_EFFECTS (t) = 1;
12272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12273
12274 #ifdef HAVE_AS_GNU_ATTRIBUTE
12275 if (call_ABI_of_interest (cfun->decl))
12276 rs6000_passes_float = true;
12277 #endif
12278 }
12279
12280 /* Find the overflow area. */
12281 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12282 if (words != 0)
12283 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12284 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12285 TREE_SIDE_EFFECTS (t) = 1;
12286 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12287
12288 /* If there were no va_arg invocations, don't set up the register
12289 save area. */
12290 if (!cfun->va_list_gpr_size
12291 && !cfun->va_list_fpr_size
12292 && n_gpr < GP_ARG_NUM_REG
12293 && n_fpr < FP_ARG_V4_MAX_REG)
12294 return;
12295
12296 /* Find the register save area. */
12297 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12298 if (cfun->machine->varargs_save_offset)
12299 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12300 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12301 TREE_SIDE_EFFECTS (t) = 1;
12302 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12303 }
12304
12305 /* Implement va_arg. */
12306
12307 static tree
12308 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12309 gimple_seq *post_p)
12310 {
12311 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12312 tree gpr, fpr, ovf, sav, reg, t, u;
12313 int size, rsize, n_reg, sav_ofs, sav_scale;
12314 tree lab_false, lab_over, addr;
12315 int align;
12316 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12317 int regalign = 0;
12318 gimple *stmt;
12319
12320 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12321 {
12322 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12323 return build_va_arg_indirect_ref (t);
12324 }
12325
12326 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12327 earlier version of gcc, with the property that it always applied alignment
12328 adjustments to the va-args (even for zero-sized types). The cheapest way
12329 to deal with this is to replicate the effect of the part of
12330 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12331 of relevance.
12332 We don't need to check for pass-by-reference because of the test above.
12333 We can return a simplifed answer, since we know there's no offset to add. */
12334
12335 if (((TARGET_MACHO
12336 && rs6000_darwin64_abi)
12337 || DEFAULT_ABI == ABI_ELFv2
12338 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12339 && integer_zerop (TYPE_SIZE (type)))
12340 {
12341 unsigned HOST_WIDE_INT align, boundary;
12342 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12343 align = PARM_BOUNDARY / BITS_PER_UNIT;
12344 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12345 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12346 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12347 boundary /= BITS_PER_UNIT;
12348 if (boundary > align)
12349 {
12350 tree t ;
12351 /* This updates arg ptr by the amount that would be necessary
12352 to align the zero-sized (but not zero-alignment) item. */
12353 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12354 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12355 gimplify_and_add (t, pre_p);
12356
12357 t = fold_convert (sizetype, valist_tmp);
12358 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12359 fold_convert (TREE_TYPE (valist),
12360 fold_build2 (BIT_AND_EXPR, sizetype, t,
12361 size_int (-boundary))));
12362 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12363 gimplify_and_add (t, pre_p);
12364 }
12365 /* Since it is zero-sized there's no increment for the item itself. */
12366 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12367 return build_va_arg_indirect_ref (valist_tmp);
12368 }
12369
12370 if (DEFAULT_ABI != ABI_V4)
12371 {
12372 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12373 {
12374 tree elem_type = TREE_TYPE (type);
12375 machine_mode elem_mode = TYPE_MODE (elem_type);
12376 int elem_size = GET_MODE_SIZE (elem_mode);
12377
12378 if (elem_size < UNITS_PER_WORD)
12379 {
12380 tree real_part, imag_part;
12381 gimple_seq post = NULL;
12382
12383 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12384 &post);
12385 /* Copy the value into a temporary, lest the formal temporary
12386 be reused out from under us. */
12387 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12388 gimple_seq_add_seq (pre_p, post);
12389
12390 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12391 post_p);
12392
12393 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12394 }
12395 }
12396
12397 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12398 }
12399
12400 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12401 f_fpr = DECL_CHAIN (f_gpr);
12402 f_res = DECL_CHAIN (f_fpr);
12403 f_ovf = DECL_CHAIN (f_res);
12404 f_sav = DECL_CHAIN (f_ovf);
12405
12406 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12407 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12408 f_fpr, NULL_TREE);
12409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12410 f_ovf, NULL_TREE);
12411 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12412 f_sav, NULL_TREE);
12413
12414 size = int_size_in_bytes (type);
12415 rsize = (size + 3) / 4;
12416 int pad = 4 * rsize - size;
12417 align = 1;
12418
12419 machine_mode mode = TYPE_MODE (type);
12420 if (abi_v4_pass_in_fpr (mode, false))
12421 {
12422 /* FP args go in FP registers, if present. */
12423 reg = fpr;
12424 n_reg = (size + 7) / 8;
12425 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12426 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12427 if (mode != SFmode && mode != SDmode)
12428 align = 8;
12429 }
12430 else
12431 {
12432 /* Otherwise into GP registers. */
12433 reg = gpr;
12434 n_reg = rsize;
12435 sav_ofs = 0;
12436 sav_scale = 4;
12437 if (n_reg == 2)
12438 align = 8;
12439 }
12440
12441 /* Pull the value out of the saved registers.... */
12442
12443 lab_over = NULL;
12444 addr = create_tmp_var (ptr_type_node, "addr");
12445
12446 /* AltiVec vectors never go in registers when -mabi=altivec. */
12447 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12448 align = 16;
12449 else
12450 {
12451 lab_false = create_artificial_label (input_location);
12452 lab_over = create_artificial_label (input_location);
12453
12454 /* Long long is aligned in the registers. As are any other 2 gpr
12455 item such as complex int due to a historical mistake. */
12456 u = reg;
12457 if (n_reg == 2 && reg == gpr)
12458 {
12459 regalign = 1;
12460 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12461 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12462 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12463 unshare_expr (reg), u);
12464 }
12465 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12466 reg number is 0 for f1, so we want to make it odd. */
12467 else if (reg == fpr && mode == TDmode)
12468 {
12469 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12470 build_int_cst (TREE_TYPE (reg), 1));
12471 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12472 }
12473
12474 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12475 t = build2 (GE_EXPR, boolean_type_node, u, t);
12476 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12477 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12478 gimplify_and_add (t, pre_p);
12479
12480 t = sav;
12481 if (sav_ofs)
12482 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12483
12484 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12485 build_int_cst (TREE_TYPE (reg), n_reg));
12486 u = fold_convert (sizetype, u);
12487 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12488 t = fold_build_pointer_plus (t, u);
12489
12490 /* _Decimal32 varargs are located in the second word of the 64-bit
12491 FP register for 32-bit binaries. */
12492 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12493 t = fold_build_pointer_plus_hwi (t, size);
12494
12495 /* Args are passed right-aligned. */
12496 if (BYTES_BIG_ENDIAN)
12497 t = fold_build_pointer_plus_hwi (t, pad);
12498
12499 gimplify_assign (addr, t, pre_p);
12500
12501 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12502
12503 stmt = gimple_build_label (lab_false);
12504 gimple_seq_add_stmt (pre_p, stmt);
12505
12506 if ((n_reg == 2 && !regalign) || n_reg > 2)
12507 {
12508 /* Ensure that we don't find any more args in regs.
12509 Alignment has taken care of for special cases. */
12510 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12511 }
12512 }
12513
12514 /* ... otherwise out of the overflow area. */
12515
12516 /* Care for on-stack alignment if needed. */
12517 t = ovf;
12518 if (align != 1)
12519 {
12520 t = fold_build_pointer_plus_hwi (t, align - 1);
12521 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12522 build_int_cst (TREE_TYPE (t), -align));
12523 }
12524
12525 /* Args are passed right-aligned. */
12526 if (BYTES_BIG_ENDIAN)
12527 t = fold_build_pointer_plus_hwi (t, pad);
12528
12529 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12530
12531 gimplify_assign (unshare_expr (addr), t, pre_p);
12532
12533 t = fold_build_pointer_plus_hwi (t, size);
12534 gimplify_assign (unshare_expr (ovf), t, pre_p);
12535
12536 if (lab_over)
12537 {
12538 stmt = gimple_build_label (lab_over);
12539 gimple_seq_add_stmt (pre_p, stmt);
12540 }
12541
12542 if (STRICT_ALIGNMENT
12543 && (TYPE_ALIGN (type)
12544 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12545 {
12546 /* The value (of type complex double, for example) may not be
12547 aligned in memory in the saved registers, so copy via a
12548 temporary. (This is the same code as used for SPARC.) */
12549 tree tmp = create_tmp_var (type, "va_arg_tmp");
12550 tree dest_addr = build_fold_addr_expr (tmp);
12551
12552 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12553 3, dest_addr, addr, size_int (rsize * 4));
12554 TREE_ADDRESSABLE (tmp) = 1;
12555
12556 gimplify_and_add (copy, pre_p);
12557 addr = dest_addr;
12558 }
12559
12560 addr = fold_convert (ptrtype, addr);
12561 return build_va_arg_indirect_ref (addr);
12562 }
12563
12564 /* Builtins. */
12565
12566 static void
12567 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12568 {
12569 tree t;
12570 unsigned classify = rs6000_builtin_info[(int)code].attr;
12571 const char *attr_string = "";
12572
12573 gcc_assert (name != NULL);
12574 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12575
12576 if (rs6000_builtin_decls[(int)code])
12577 fatal_error (input_location,
12578 "internal error: builtin function %qs already processed",
12579 name);
12580
12581 rs6000_builtin_decls[(int)code] = t =
12582 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12583
12584 /* Set any special attributes. */
12585 if ((classify & RS6000_BTC_CONST) != 0)
12586 {
12587 /* const function, function only depends on the inputs. */
12588 TREE_READONLY (t) = 1;
12589 TREE_NOTHROW (t) = 1;
12590 attr_string = ", const";
12591 }
12592 else if ((classify & RS6000_BTC_PURE) != 0)
12593 {
12594 /* pure function, function can read global memory, but does not set any
12595 external state. */
12596 DECL_PURE_P (t) = 1;
12597 TREE_NOTHROW (t) = 1;
12598 attr_string = ", pure";
12599 }
12600 else if ((classify & RS6000_BTC_FP) != 0)
12601 {
12602 /* Function is a math function. If rounding mode is on, then treat the
12603 function as not reading global memory, but it can have arbitrary side
12604 effects. If it is off, then assume the function is a const function.
12605 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12606 builtin-attribute.def that is used for the math functions. */
12607 TREE_NOTHROW (t) = 1;
12608 if (flag_rounding_math)
12609 {
12610 DECL_PURE_P (t) = 1;
12611 DECL_IS_NOVOPS (t) = 1;
12612 attr_string = ", fp, pure";
12613 }
12614 else
12615 {
12616 TREE_READONLY (t) = 1;
12617 attr_string = ", fp, const";
12618 }
12619 }
12620 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12621 gcc_unreachable ();
12622
12623 if (TARGET_DEBUG_BUILTIN)
12624 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12625 (int)code, name, attr_string);
12626 }
12627
12628 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12629
12630 #undef RS6000_BUILTIN_0
12631 #undef RS6000_BUILTIN_1
12632 #undef RS6000_BUILTIN_2
12633 #undef RS6000_BUILTIN_3
12634 #undef RS6000_BUILTIN_A
12635 #undef RS6000_BUILTIN_D
12636 #undef RS6000_BUILTIN_H
12637 #undef RS6000_BUILTIN_P
12638 #undef RS6000_BUILTIN_X
12639
12640 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12641 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12642 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12643 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12644 { MASK, ICODE, NAME, ENUM },
12645
12646 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12647 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12648 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12649 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12650 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12651
12652 static const struct builtin_description bdesc_3arg[] =
12653 {
12654 #include "rs6000-builtin.def"
12655 };
12656
12657 /* DST operations: void foo (void *, const int, const char). */
12658
12659 #undef RS6000_BUILTIN_0
12660 #undef RS6000_BUILTIN_1
12661 #undef RS6000_BUILTIN_2
12662 #undef RS6000_BUILTIN_3
12663 #undef RS6000_BUILTIN_A
12664 #undef RS6000_BUILTIN_D
12665 #undef RS6000_BUILTIN_H
12666 #undef RS6000_BUILTIN_P
12667 #undef RS6000_BUILTIN_X
12668
12669 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12670 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12671 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12672 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12673 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12674 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12675 { MASK, ICODE, NAME, ENUM },
12676
12677 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12678 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12679 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12680
12681 static const struct builtin_description bdesc_dst[] =
12682 {
12683 #include "rs6000-builtin.def"
12684 };
12685
12686 /* Simple binary operations: VECc = foo (VECa, VECb). */
12687
12688 #undef RS6000_BUILTIN_0
12689 #undef RS6000_BUILTIN_1
12690 #undef RS6000_BUILTIN_2
12691 #undef RS6000_BUILTIN_3
12692 #undef RS6000_BUILTIN_A
12693 #undef RS6000_BUILTIN_D
12694 #undef RS6000_BUILTIN_H
12695 #undef RS6000_BUILTIN_P
12696 #undef RS6000_BUILTIN_X
12697
12698 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12699 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12700 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12701 { MASK, ICODE, NAME, ENUM },
12702
12703 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12704 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12705 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12706 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12707 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12708 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12709
12710 static const struct builtin_description bdesc_2arg[] =
12711 {
12712 #include "rs6000-builtin.def"
12713 };
12714
12715 #undef RS6000_BUILTIN_0
12716 #undef RS6000_BUILTIN_1
12717 #undef RS6000_BUILTIN_2
12718 #undef RS6000_BUILTIN_3
12719 #undef RS6000_BUILTIN_A
12720 #undef RS6000_BUILTIN_D
12721 #undef RS6000_BUILTIN_H
12722 #undef RS6000_BUILTIN_P
12723 #undef RS6000_BUILTIN_X
12724
12725 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12726 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12727 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12728 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12729 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12730 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12731 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12732 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12733 { MASK, ICODE, NAME, ENUM },
12734
12735 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12736
12737 /* AltiVec predicates. */
12738
12739 static const struct builtin_description bdesc_altivec_preds[] =
12740 {
12741 #include "rs6000-builtin.def"
12742 };
12743
12744 /* ABS* operations. */
12745
12746 #undef RS6000_BUILTIN_0
12747 #undef RS6000_BUILTIN_1
12748 #undef RS6000_BUILTIN_2
12749 #undef RS6000_BUILTIN_3
12750 #undef RS6000_BUILTIN_A
12751 #undef RS6000_BUILTIN_D
12752 #undef RS6000_BUILTIN_H
12753 #undef RS6000_BUILTIN_P
12754 #undef RS6000_BUILTIN_X
12755
12756 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12757 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12758 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12759 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12760 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12761 { MASK, ICODE, NAME, ENUM },
12762
12763 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12764 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12765 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12766 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12767
12768 static const struct builtin_description bdesc_abs[] =
12769 {
12770 #include "rs6000-builtin.def"
12771 };
12772
12773 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12774 foo (VECa). */
12775
12776 #undef RS6000_BUILTIN_0
12777 #undef RS6000_BUILTIN_1
12778 #undef RS6000_BUILTIN_2
12779 #undef RS6000_BUILTIN_3
12780 #undef RS6000_BUILTIN_A
12781 #undef RS6000_BUILTIN_D
12782 #undef RS6000_BUILTIN_H
12783 #undef RS6000_BUILTIN_P
12784 #undef RS6000_BUILTIN_X
12785
12786 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12787 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12788 { MASK, ICODE, NAME, ENUM },
12789
12790 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12791 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12792 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12793 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12794 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12795 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12796 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12797
12798 static const struct builtin_description bdesc_1arg[] =
12799 {
12800 #include "rs6000-builtin.def"
12801 };
12802
12803 /* Simple no-argument operations: result = __builtin_darn_32 () */
12804
12805 #undef RS6000_BUILTIN_0
12806 #undef RS6000_BUILTIN_1
12807 #undef RS6000_BUILTIN_2
12808 #undef RS6000_BUILTIN_3
12809 #undef RS6000_BUILTIN_A
12810 #undef RS6000_BUILTIN_D
12811 #undef RS6000_BUILTIN_H
12812 #undef RS6000_BUILTIN_P
12813 #undef RS6000_BUILTIN_X
12814
12815 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
12816 { MASK, ICODE, NAME, ENUM },
12817
12818 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12819 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12820 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12821 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12822 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12823 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12824 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12825 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12826
12827 static const struct builtin_description bdesc_0arg[] =
12828 {
12829 #include "rs6000-builtin.def"
12830 };
12831
12832 /* HTM builtins. */
12833 #undef RS6000_BUILTIN_0
12834 #undef RS6000_BUILTIN_1
12835 #undef RS6000_BUILTIN_2
12836 #undef RS6000_BUILTIN_3
12837 #undef RS6000_BUILTIN_A
12838 #undef RS6000_BUILTIN_D
12839 #undef RS6000_BUILTIN_H
12840 #undef RS6000_BUILTIN_P
12841 #undef RS6000_BUILTIN_X
12842
12843 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12844 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12845 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12846 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12847 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12848 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12849 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12850 { MASK, ICODE, NAME, ENUM },
12851
12852 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12853 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12854
12855 static const struct builtin_description bdesc_htm[] =
12856 {
12857 #include "rs6000-builtin.def"
12858 };
12859
12860 #undef RS6000_BUILTIN_0
12861 #undef RS6000_BUILTIN_1
12862 #undef RS6000_BUILTIN_2
12863 #undef RS6000_BUILTIN_3
12864 #undef RS6000_BUILTIN_A
12865 #undef RS6000_BUILTIN_D
12866 #undef RS6000_BUILTIN_H
12867 #undef RS6000_BUILTIN_P
12868
12869 /* Return true if a builtin function is overloaded. */
12870 bool
12871 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12872 {
12873 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12874 }
12875
12876 const char *
12877 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
12878 {
12879 return rs6000_builtin_info[(int)fncode].name;
12880 }
12881
12882 /* Expand an expression EXP that calls a builtin without arguments. */
12883 static rtx
12884 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12885 {
12886 rtx pat;
12887 machine_mode tmode = insn_data[icode].operand[0].mode;
12888
12889 if (icode == CODE_FOR_nothing)
12890 /* Builtin not supported on this processor. */
12891 return 0;
12892
12893 if (icode == CODE_FOR_rs6000_mffsl
12894 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12895 {
12896 error ("%<__builtin_mffsl%> not supported with %<-msoft-float%>");
12897 return const0_rtx;
12898 }
12899
12900 if (target == 0
12901 || GET_MODE (target) != tmode
12902 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12903 target = gen_reg_rtx (tmode);
12904
12905 pat = GEN_FCN (icode) (target);
12906 if (! pat)
12907 return 0;
12908 emit_insn (pat);
12909
12910 return target;
12911 }
12912
12913
12914 static rtx
12915 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12916 {
12917 rtx pat;
12918 tree arg0 = CALL_EXPR_ARG (exp, 0);
12919 tree arg1 = CALL_EXPR_ARG (exp, 1);
12920 rtx op0 = expand_normal (arg0);
12921 rtx op1 = expand_normal (arg1);
12922 machine_mode mode0 = insn_data[icode].operand[0].mode;
12923 machine_mode mode1 = insn_data[icode].operand[1].mode;
12924
12925 if (icode == CODE_FOR_nothing)
12926 /* Builtin not supported on this processor. */
12927 return 0;
12928
12929 /* If we got invalid arguments bail out before generating bad rtl. */
12930 if (arg0 == error_mark_node || arg1 == error_mark_node)
12931 return const0_rtx;
12932
12933 if (!CONST_INT_P (op0)
12934 || INTVAL (op0) > 255
12935 || INTVAL (op0) < 0)
12936 {
12937 error ("argument 1 must be an 8-bit field value");
12938 return const0_rtx;
12939 }
12940
12941 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12942 op0 = copy_to_mode_reg (mode0, op0);
12943
12944 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12945 op1 = copy_to_mode_reg (mode1, op1);
12946
12947 pat = GEN_FCN (icode) (op0, op1);
12948 if (!pat)
12949 return const0_rtx;
12950 emit_insn (pat);
12951
12952 return NULL_RTX;
12953 }
12954
12955 static rtx
12956 rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
12957 {
12958 rtx pat;
12959 tree arg0 = CALL_EXPR_ARG (exp, 0);
12960 rtx op0 = expand_normal (arg0);
12961
12962 if (icode == CODE_FOR_nothing)
12963 /* Builtin not supported on this processor. */
12964 return 0;
12965
12966 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12967 {
12968 error ("%<__builtin_mtfsb0%> and %<__builtin_mtfsb1%> not supported with "
12969 "%<-msoft-float%>");
12970 return const0_rtx;
12971 }
12972
12973 /* If we got invalid arguments bail out before generating bad rtl. */
12974 if (arg0 == error_mark_node)
12975 return const0_rtx;
12976
12977 /* Only allow bit numbers 0 to 31. */
12978 if (!u5bit_cint_operand (op0, VOIDmode))
12979 {
12980 error ("Argument must be a constant between 0 and 31.");
12981 return const0_rtx;
12982 }
12983
12984 pat = GEN_FCN (icode) (op0);
12985 if (!pat)
12986 return const0_rtx;
12987 emit_insn (pat);
12988
12989 return NULL_RTX;
12990 }
12991
12992 static rtx
12993 rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
12994 {
12995 rtx pat;
12996 tree arg0 = CALL_EXPR_ARG (exp, 0);
12997 rtx op0 = expand_normal (arg0);
12998 machine_mode mode0 = insn_data[icode].operand[0].mode;
12999
13000 if (icode == CODE_FOR_nothing)
13001 /* Builtin not supported on this processor. */
13002 return 0;
13003
13004 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13005 {
13006 error ("%<__builtin_set_fpscr_rn%> not supported with %<-msoft-float%>");
13007 return const0_rtx;
13008 }
13009
13010 /* If we got invalid arguments bail out before generating bad rtl. */
13011 if (arg0 == error_mark_node)
13012 return const0_rtx;
13013
13014 /* If the argument is a constant, check the range. Argument can only be a
13015 2-bit value. Unfortunately, can't check the range of the value at
13016 compile time if the argument is a variable. The least significant two
13017 bits of the argument, regardless of type, are used to set the rounding
13018 mode. All other bits are ignored. */
13019 if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
13020 {
13021 error ("Argument must be a value between 0 and 3.");
13022 return const0_rtx;
13023 }
13024
13025 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13026 op0 = copy_to_mode_reg (mode0, op0);
13027
13028 pat = GEN_FCN (icode) (op0);
13029 if (!pat)
13030 return const0_rtx;
13031 emit_insn (pat);
13032
13033 return NULL_RTX;
13034 }
13035 static rtx
13036 rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
13037 {
13038 rtx pat;
13039 tree arg0 = CALL_EXPR_ARG (exp, 0);
13040 rtx op0 = expand_normal (arg0);
13041 machine_mode mode0 = insn_data[icode].operand[0].mode;
13042
13043 if (TARGET_32BIT)
13044 /* Builtin not supported in 32-bit mode. */
13045 fatal_error (input_location,
13046 "%<__builtin_set_fpscr_drn%> is not supported "
13047 "in 32-bit mode");
13048
13049 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13050 {
13051 error ("%<__builtin_set_fpscr_drn%> not supported with %<-msoft-float%>");
13052 return const0_rtx;
13053 }
13054
13055 if (icode == CODE_FOR_nothing)
13056 /* Builtin not supported on this processor. */
13057 return 0;
13058
13059 /* If we got invalid arguments bail out before generating bad rtl. */
13060 if (arg0 == error_mark_node)
13061 return const0_rtx;
13062
13063 /* If the argument is a constant, check the range. Agrument can only be a
13064 3-bit value. Unfortunately, can't check the range of the value at
13065 compile time if the argument is a variable. The least significant two
13066 bits of the argument, regardless of type, are used to set the rounding
13067 mode. All other bits are ignored. */
13068 if (CONST_INT_P (op0) && !const_0_to_7_operand(op0, VOIDmode))
13069 {
13070 error ("Argument must be a value between 0 and 7.");
13071 return const0_rtx;
13072 }
13073
13074 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13075 op0 = copy_to_mode_reg (mode0, op0);
13076
13077 pat = GEN_FCN (icode) (op0);
13078 if (! pat)
13079 return const0_rtx;
13080 emit_insn (pat);
13081
13082 return NULL_RTX;
13083 }
13084
13085 static rtx
13086 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13087 {
13088 rtx pat;
13089 tree arg0 = CALL_EXPR_ARG (exp, 0);
13090 rtx op0 = expand_normal (arg0);
13091 machine_mode tmode = insn_data[icode].operand[0].mode;
13092 machine_mode mode0 = insn_data[icode].operand[1].mode;
13093
13094 if (icode == CODE_FOR_nothing)
13095 /* Builtin not supported on this processor. */
13096 return 0;
13097
13098 /* If we got invalid arguments bail out before generating bad rtl. */
13099 if (arg0 == error_mark_node)
13100 return const0_rtx;
13101
13102 if (icode == CODE_FOR_altivec_vspltisb
13103 || icode == CODE_FOR_altivec_vspltish
13104 || icode == CODE_FOR_altivec_vspltisw)
13105 {
13106 /* Only allow 5-bit *signed* literals. */
13107 if (!CONST_INT_P (op0)
13108 || INTVAL (op0) > 15
13109 || INTVAL (op0) < -16)
13110 {
13111 error ("argument 1 must be a 5-bit signed literal");
13112 return CONST0_RTX (tmode);
13113 }
13114 }
13115
13116 if (target == 0
13117 || GET_MODE (target) != tmode
13118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13119 target = gen_reg_rtx (tmode);
13120
13121 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13122 op0 = copy_to_mode_reg (mode0, op0);
13123
13124 pat = GEN_FCN (icode) (target, op0);
13125 if (! pat)
13126 return 0;
13127 emit_insn (pat);
13128
13129 return target;
13130 }
13131
13132 static rtx
13133 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13134 {
13135 rtx pat, scratch1, scratch2;
13136 tree arg0 = CALL_EXPR_ARG (exp, 0);
13137 rtx op0 = expand_normal (arg0);
13138 machine_mode tmode = insn_data[icode].operand[0].mode;
13139 machine_mode mode0 = insn_data[icode].operand[1].mode;
13140
13141 /* If we have invalid arguments, bail out before generating bad rtl. */
13142 if (arg0 == error_mark_node)
13143 return const0_rtx;
13144
13145 if (target == 0
13146 || GET_MODE (target) != tmode
13147 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13148 target = gen_reg_rtx (tmode);
13149
13150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13151 op0 = copy_to_mode_reg (mode0, op0);
13152
13153 scratch1 = gen_reg_rtx (mode0);
13154 scratch2 = gen_reg_rtx (mode0);
13155
13156 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13157 if (! pat)
13158 return 0;
13159 emit_insn (pat);
13160
13161 return target;
13162 }
13163
13164 static rtx
13165 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13166 {
13167 rtx pat;
13168 tree arg0 = CALL_EXPR_ARG (exp, 0);
13169 tree arg1 = CALL_EXPR_ARG (exp, 1);
13170 rtx op0 = expand_normal (arg0);
13171 rtx op1 = expand_normal (arg1);
13172 machine_mode tmode = insn_data[icode].operand[0].mode;
13173 machine_mode mode0 = insn_data[icode].operand[1].mode;
13174 machine_mode mode1 = insn_data[icode].operand[2].mode;
13175
13176 if (icode == CODE_FOR_nothing)
13177 /* Builtin not supported on this processor. */
13178 return 0;
13179
13180 /* If we got invalid arguments bail out before generating bad rtl. */
13181 if (arg0 == error_mark_node || arg1 == error_mark_node)
13182 return const0_rtx;
13183
13184 if (icode == CODE_FOR_unpackv1ti
13185 || icode == CODE_FOR_unpackkf
13186 || icode == CODE_FOR_unpacktf
13187 || icode == CODE_FOR_unpackif
13188 || icode == CODE_FOR_unpacktd)
13189 {
13190 /* Only allow 1-bit unsigned literals. */
13191 STRIP_NOPS (arg1);
13192 if (TREE_CODE (arg1) != INTEGER_CST
13193 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13194 {
13195 error ("argument 2 must be a 1-bit unsigned literal");
13196 return CONST0_RTX (tmode);
13197 }
13198 }
13199 else if (icode == CODE_FOR_altivec_vspltw)
13200 {
13201 /* Only allow 2-bit unsigned literals. */
13202 STRIP_NOPS (arg1);
13203 if (TREE_CODE (arg1) != INTEGER_CST
13204 || TREE_INT_CST_LOW (arg1) & ~3)
13205 {
13206 error ("argument 2 must be a 2-bit unsigned literal");
13207 return CONST0_RTX (tmode);
13208 }
13209 }
13210 else if (icode == CODE_FOR_altivec_vsplth)
13211 {
13212 /* Only allow 3-bit unsigned literals. */
13213 STRIP_NOPS (arg1);
13214 if (TREE_CODE (arg1) != INTEGER_CST
13215 || TREE_INT_CST_LOW (arg1) & ~7)
13216 {
13217 error ("argument 2 must be a 3-bit unsigned literal");
13218 return CONST0_RTX (tmode);
13219 }
13220 }
13221 else if (icode == CODE_FOR_altivec_vspltb)
13222 {
13223 /* Only allow 4-bit unsigned literals. */
13224 STRIP_NOPS (arg1);
13225 if (TREE_CODE (arg1) != INTEGER_CST
13226 || TREE_INT_CST_LOW (arg1) & ~15)
13227 {
13228 error ("argument 2 must be a 4-bit unsigned literal");
13229 return CONST0_RTX (tmode);
13230 }
13231 }
13232 else if (icode == CODE_FOR_altivec_vcfux
13233 || icode == CODE_FOR_altivec_vcfsx
13234 || icode == CODE_FOR_altivec_vctsxs
13235 || icode == CODE_FOR_altivec_vctuxs)
13236 {
13237 /* Only allow 5-bit unsigned literals. */
13238 STRIP_NOPS (arg1);
13239 if (TREE_CODE (arg1) != INTEGER_CST
13240 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13241 {
13242 error ("argument 2 must be a 5-bit unsigned literal");
13243 return CONST0_RTX (tmode);
13244 }
13245 }
13246 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13247 || icode == CODE_FOR_dfptstsfi_lt_dd
13248 || icode == CODE_FOR_dfptstsfi_gt_dd
13249 || icode == CODE_FOR_dfptstsfi_unordered_dd
13250 || icode == CODE_FOR_dfptstsfi_eq_td
13251 || icode == CODE_FOR_dfptstsfi_lt_td
13252 || icode == CODE_FOR_dfptstsfi_gt_td
13253 || icode == CODE_FOR_dfptstsfi_unordered_td)
13254 {
13255 /* Only allow 6-bit unsigned literals. */
13256 STRIP_NOPS (arg0);
13257 if (TREE_CODE (arg0) != INTEGER_CST
13258 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13259 {
13260 error ("argument 1 must be a 6-bit unsigned literal");
13261 return CONST0_RTX (tmode);
13262 }
13263 }
13264 else if (icode == CODE_FOR_xststdcqp_kf
13265 || icode == CODE_FOR_xststdcqp_tf
13266 || icode == CODE_FOR_xststdcdp
13267 || icode == CODE_FOR_xststdcsp
13268 || icode == CODE_FOR_xvtstdcdp
13269 || icode == CODE_FOR_xvtstdcsp)
13270 {
13271 /* Only allow 7-bit unsigned literals. */
13272 STRIP_NOPS (arg1);
13273 if (TREE_CODE (arg1) != INTEGER_CST
13274 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13275 {
13276 error ("argument 2 must be a 7-bit unsigned literal");
13277 return CONST0_RTX (tmode);
13278 }
13279 }
13280
13281 if (target == 0
13282 || GET_MODE (target) != tmode
13283 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13284 target = gen_reg_rtx (tmode);
13285
13286 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13287 op0 = copy_to_mode_reg (mode0, op0);
13288 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13289 op1 = copy_to_mode_reg (mode1, op1);
13290
13291 pat = GEN_FCN (icode) (target, op0, op1);
13292 if (! pat)
13293 return 0;
13294 emit_insn (pat);
13295
13296 return target;
13297 }
13298
13299 static rtx
13300 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13301 {
13302 rtx pat, scratch;
13303 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13304 tree arg0 = CALL_EXPR_ARG (exp, 1);
13305 tree arg1 = CALL_EXPR_ARG (exp, 2);
13306 rtx op0 = expand_normal (arg0);
13307 rtx op1 = expand_normal (arg1);
13308 machine_mode tmode = SImode;
13309 machine_mode mode0 = insn_data[icode].operand[1].mode;
13310 machine_mode mode1 = insn_data[icode].operand[2].mode;
13311 int cr6_form_int;
13312
13313 if (TREE_CODE (cr6_form) != INTEGER_CST)
13314 {
13315 error ("argument 1 of %qs must be a constant",
13316 "__builtin_altivec_predicate");
13317 return const0_rtx;
13318 }
13319 else
13320 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13321
13322 gcc_assert (mode0 == mode1);
13323
13324 /* If we have invalid arguments, bail out before generating bad rtl. */
13325 if (arg0 == error_mark_node || arg1 == error_mark_node)
13326 return const0_rtx;
13327
13328 if (target == 0
13329 || GET_MODE (target) != tmode
13330 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13331 target = gen_reg_rtx (tmode);
13332
13333 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13334 op0 = copy_to_mode_reg (mode0, op0);
13335 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13336 op1 = copy_to_mode_reg (mode1, op1);
13337
13338 /* Note that for many of the relevant operations (e.g. cmpne or
13339 cmpeq) with float or double operands, it makes more sense for the
13340 mode of the allocated scratch register to select a vector of
13341 integer. But the choice to copy the mode of operand 0 was made
13342 long ago and there are no plans to change it. */
13343 scratch = gen_reg_rtx (mode0);
13344
13345 pat = GEN_FCN (icode) (scratch, op0, op1);
13346 if (! pat)
13347 return 0;
13348 emit_insn (pat);
13349
13350 /* The vec_any* and vec_all* predicates use the same opcodes for two
13351 different operations, but the bits in CR6 will be different
13352 depending on what information we want. So we have to play tricks
13353 with CR6 to get the right bits out.
13354
13355 If you think this is disgusting, look at the specs for the
13356 AltiVec predicates. */
13357
13358 switch (cr6_form_int)
13359 {
13360 case 0:
13361 emit_insn (gen_cr6_test_for_zero (target));
13362 break;
13363 case 1:
13364 emit_insn (gen_cr6_test_for_zero_reverse (target));
13365 break;
13366 case 2:
13367 emit_insn (gen_cr6_test_for_lt (target));
13368 break;
13369 case 3:
13370 emit_insn (gen_cr6_test_for_lt_reverse (target));
13371 break;
13372 default:
13373 error ("argument 1 of %qs is out of range",
13374 "__builtin_altivec_predicate");
13375 break;
13376 }
13377
13378 return target;
13379 }
13380
13381 rtx
13382 swap_endian_selector_for_mode (machine_mode mode)
13383 {
13384 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13385 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13386 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13387 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13388
13389 unsigned int *swaparray, i;
13390 rtx perm[16];
13391
13392 switch (mode)
13393 {
13394 case E_V1TImode:
13395 swaparray = swap1;
13396 break;
13397 case E_V2DFmode:
13398 case E_V2DImode:
13399 swaparray = swap2;
13400 break;
13401 case E_V4SFmode:
13402 case E_V4SImode:
13403 swaparray = swap4;
13404 break;
13405 case E_V8HImode:
13406 swaparray = swap8;
13407 break;
13408 default:
13409 gcc_unreachable ();
13410 }
13411
13412 for (i = 0; i < 16; ++i)
13413 perm[i] = GEN_INT (swaparray[i]);
13414
13415 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13416 gen_rtvec_v (16, perm)));
13417 }
13418
13419 static rtx
13420 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13421 {
13422 rtx pat, addr;
13423 tree arg0 = CALL_EXPR_ARG (exp, 0);
13424 tree arg1 = CALL_EXPR_ARG (exp, 1);
13425 machine_mode tmode = insn_data[icode].operand[0].mode;
13426 machine_mode mode0 = Pmode;
13427 machine_mode mode1 = Pmode;
13428 rtx op0 = expand_normal (arg0);
13429 rtx op1 = expand_normal (arg1);
13430
13431 if (icode == CODE_FOR_nothing)
13432 /* Builtin not supported on this processor. */
13433 return 0;
13434
13435 /* If we got invalid arguments bail out before generating bad rtl. */
13436 if (arg0 == error_mark_node || arg1 == error_mark_node)
13437 return const0_rtx;
13438
13439 if (target == 0
13440 || GET_MODE (target) != tmode
13441 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13442 target = gen_reg_rtx (tmode);
13443
13444 op1 = copy_to_mode_reg (mode1, op1);
13445
13446 /* For LVX, express the RTL accurately by ANDing the address with -16.
13447 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13448 so the raw address is fine. */
13449 if (icode == CODE_FOR_altivec_lvx_v1ti
13450 || icode == CODE_FOR_altivec_lvx_v2df
13451 || icode == CODE_FOR_altivec_lvx_v2di
13452 || icode == CODE_FOR_altivec_lvx_v4sf
13453 || icode == CODE_FOR_altivec_lvx_v4si
13454 || icode == CODE_FOR_altivec_lvx_v8hi
13455 || icode == CODE_FOR_altivec_lvx_v16qi)
13456 {
13457 rtx rawaddr;
13458 if (op0 == const0_rtx)
13459 rawaddr = op1;
13460 else
13461 {
13462 op0 = copy_to_mode_reg (mode0, op0);
13463 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13464 }
13465 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13466 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13467
13468 emit_insn (gen_rtx_SET (target, addr));
13469 }
13470 else
13471 {
13472 if (op0 == const0_rtx)
13473 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13474 else
13475 {
13476 op0 = copy_to_mode_reg (mode0, op0);
13477 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13478 gen_rtx_PLUS (Pmode, op1, op0));
13479 }
13480
13481 pat = GEN_FCN (icode) (target, addr);
13482 if (! pat)
13483 return 0;
13484 emit_insn (pat);
13485 }
13486
13487 return target;
13488 }
13489
13490 static rtx
13491 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13492 {
13493 rtx pat;
13494 tree arg0 = CALL_EXPR_ARG (exp, 0);
13495 tree arg1 = CALL_EXPR_ARG (exp, 1);
13496 tree arg2 = CALL_EXPR_ARG (exp, 2);
13497 rtx op0 = expand_normal (arg0);
13498 rtx op1 = expand_normal (arg1);
13499 rtx op2 = expand_normal (arg2);
13500 machine_mode mode0 = insn_data[icode].operand[0].mode;
13501 machine_mode mode1 = insn_data[icode].operand[1].mode;
13502 machine_mode mode2 = insn_data[icode].operand[2].mode;
13503
13504 if (icode == CODE_FOR_nothing)
13505 /* Builtin not supported on this processor. */
13506 return NULL_RTX;
13507
13508 /* If we got invalid arguments bail out before generating bad rtl. */
13509 if (arg0 == error_mark_node
13510 || arg1 == error_mark_node
13511 || arg2 == error_mark_node)
13512 return NULL_RTX;
13513
13514 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13515 op0 = copy_to_mode_reg (mode0, op0);
13516 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13517 op1 = copy_to_mode_reg (mode1, op1);
13518 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13519 op2 = copy_to_mode_reg (mode2, op2);
13520
13521 pat = GEN_FCN (icode) (op0, op1, op2);
13522 if (pat)
13523 emit_insn (pat);
13524
13525 return NULL_RTX;
13526 }
13527
13528 static rtx
13529 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13530 {
13531 tree arg0 = CALL_EXPR_ARG (exp, 0);
13532 tree arg1 = CALL_EXPR_ARG (exp, 1);
13533 tree arg2 = CALL_EXPR_ARG (exp, 2);
13534 rtx op0 = expand_normal (arg0);
13535 rtx op1 = expand_normal (arg1);
13536 rtx op2 = expand_normal (arg2);
13537 rtx pat, addr, rawaddr;
13538 machine_mode tmode = insn_data[icode].operand[0].mode;
13539 machine_mode smode = insn_data[icode].operand[1].mode;
13540 machine_mode mode1 = Pmode;
13541 machine_mode mode2 = Pmode;
13542
13543 /* Invalid arguments. Bail before doing anything stoopid! */
13544 if (arg0 == error_mark_node
13545 || arg1 == error_mark_node
13546 || arg2 == error_mark_node)
13547 return const0_rtx;
13548
13549 op2 = copy_to_mode_reg (mode2, op2);
13550
13551 /* For STVX, express the RTL accurately by ANDing the address with -16.
13552 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13553 so the raw address is fine. */
13554 if (icode == CODE_FOR_altivec_stvx_v2df
13555 || icode == CODE_FOR_altivec_stvx_v2di
13556 || icode == CODE_FOR_altivec_stvx_v4sf
13557 || icode == CODE_FOR_altivec_stvx_v4si
13558 || icode == CODE_FOR_altivec_stvx_v8hi
13559 || icode == CODE_FOR_altivec_stvx_v16qi)
13560 {
13561 if (op1 == const0_rtx)
13562 rawaddr = op2;
13563 else
13564 {
13565 op1 = copy_to_mode_reg (mode1, op1);
13566 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13567 }
13568
13569 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13570 addr = gen_rtx_MEM (tmode, addr);
13571
13572 op0 = copy_to_mode_reg (tmode, op0);
13573
13574 emit_insn (gen_rtx_SET (addr, op0));
13575 }
13576 else
13577 {
13578 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13579 op0 = copy_to_mode_reg (smode, op0);
13580
13581 if (op1 == const0_rtx)
13582 addr = gen_rtx_MEM (tmode, op2);
13583 else
13584 {
13585 op1 = copy_to_mode_reg (mode1, op1);
13586 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13587 }
13588
13589 pat = GEN_FCN (icode) (addr, op0);
13590 if (pat)
13591 emit_insn (pat);
13592 }
13593
13594 return NULL_RTX;
13595 }
13596
13597 /* Return the appropriate SPR number associated with the given builtin. */
13598 static inline HOST_WIDE_INT
13599 htm_spr_num (enum rs6000_builtins code)
13600 {
13601 if (code == HTM_BUILTIN_GET_TFHAR
13602 || code == HTM_BUILTIN_SET_TFHAR)
13603 return TFHAR_SPR;
13604 else if (code == HTM_BUILTIN_GET_TFIAR
13605 || code == HTM_BUILTIN_SET_TFIAR)
13606 return TFIAR_SPR;
13607 else if (code == HTM_BUILTIN_GET_TEXASR
13608 || code == HTM_BUILTIN_SET_TEXASR)
13609 return TEXASR_SPR;
13610 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13611 || code == HTM_BUILTIN_SET_TEXASRU);
13612 return TEXASRU_SPR;
13613 }
13614
13615 /* Return the correct ICODE value depending on whether we are
13616 setting or reading the HTM SPRs. */
13617 static inline enum insn_code
13618 rs6000_htm_spr_icode (bool nonvoid)
13619 {
13620 if (nonvoid)
13621 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13622 else
13623 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13624 }
13625
13626 /* Expand the HTM builtin in EXP and store the result in TARGET.
13627 Store true in *EXPANDEDP if we found a builtin to expand. */
13628 static rtx
13629 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13630 {
13631 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13632 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13633 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13634 const struct builtin_description *d;
13635 size_t i;
13636
13637 *expandedp = true;
13638
13639 if (!TARGET_POWERPC64
13640 && (fcode == HTM_BUILTIN_TABORTDC
13641 || fcode == HTM_BUILTIN_TABORTDCI))
13642 {
13643 size_t uns_fcode = (size_t)fcode;
13644 const char *name = rs6000_builtin_info[uns_fcode].name;
13645 error ("builtin %qs is only valid in 64-bit mode", name);
13646 return const0_rtx;
13647 }
13648
13649 /* Expand the HTM builtins. */
13650 d = bdesc_htm;
13651 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13652 if (d->code == fcode)
13653 {
13654 rtx op[MAX_HTM_OPERANDS], pat;
13655 int nopnds = 0;
13656 tree arg;
13657 call_expr_arg_iterator iter;
13658 unsigned attr = rs6000_builtin_info[fcode].attr;
13659 enum insn_code icode = d->icode;
13660 const struct insn_operand_data *insn_op;
13661 bool uses_spr = (attr & RS6000_BTC_SPR);
13662 rtx cr = NULL_RTX;
13663
13664 if (uses_spr)
13665 icode = rs6000_htm_spr_icode (nonvoid);
13666 insn_op = &insn_data[icode].operand[0];
13667
13668 if (nonvoid)
13669 {
13670 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
13671 if (!target
13672 || GET_MODE (target) != tmode
13673 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13674 target = gen_reg_rtx (tmode);
13675 if (uses_spr)
13676 op[nopnds++] = target;
13677 }
13678
13679 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13680 {
13681 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13682 return const0_rtx;
13683
13684 insn_op = &insn_data[icode].operand[nopnds];
13685
13686 op[nopnds] = expand_normal (arg);
13687
13688 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13689 {
13690 if (!strcmp (insn_op->constraint, "n"))
13691 {
13692 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13693 if (!CONST_INT_P (op[nopnds]))
13694 error ("argument %d must be an unsigned literal", arg_num);
13695 else
13696 error ("argument %d is an unsigned literal that is "
13697 "out of range", arg_num);
13698 return const0_rtx;
13699 }
13700 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13701 }
13702
13703 nopnds++;
13704 }
13705
13706 /* Handle the builtins for extended mnemonics. These accept
13707 no arguments, but map to builtins that take arguments. */
13708 switch (fcode)
13709 {
13710 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13711 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13712 op[nopnds++] = GEN_INT (1);
13713 if (flag_checking)
13714 attr |= RS6000_BTC_UNARY;
13715 break;
13716 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13717 op[nopnds++] = GEN_INT (0);
13718 if (flag_checking)
13719 attr |= RS6000_BTC_UNARY;
13720 break;
13721 default:
13722 break;
13723 }
13724
13725 /* If this builtin accesses SPRs, then pass in the appropriate
13726 SPR number and SPR regno as the last two operands. */
13727 if (uses_spr)
13728 {
13729 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13730 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13731 }
13732 /* If this builtin accesses a CR, then pass in a scratch
13733 CR as the last operand. */
13734 else if (attr & RS6000_BTC_CR)
13735 { cr = gen_reg_rtx (CCmode);
13736 op[nopnds++] = cr;
13737 }
13738
13739 if (flag_checking)
13740 {
13741 int expected_nopnds = 0;
13742 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13743 expected_nopnds = 1;
13744 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13745 expected_nopnds = 2;
13746 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13747 expected_nopnds = 3;
13748 if (!(attr & RS6000_BTC_VOID))
13749 expected_nopnds += 1;
13750 if (uses_spr)
13751 expected_nopnds += 1;
13752
13753 gcc_assert (nopnds == expected_nopnds
13754 && nopnds <= MAX_HTM_OPERANDS);
13755 }
13756
13757 switch (nopnds)
13758 {
13759 case 1:
13760 pat = GEN_FCN (icode) (op[0]);
13761 break;
13762 case 2:
13763 pat = GEN_FCN (icode) (op[0], op[1]);
13764 break;
13765 case 3:
13766 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13767 break;
13768 case 4:
13769 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13770 break;
13771 default:
13772 gcc_unreachable ();
13773 }
13774 if (!pat)
13775 return NULL_RTX;
13776 emit_insn (pat);
13777
13778 if (attr & RS6000_BTC_CR)
13779 {
13780 if (fcode == HTM_BUILTIN_TBEGIN)
13781 {
13782 /* Emit code to set TARGET to true or false depending on
13783 whether the tbegin. instruction successfully or failed
13784 to start a transaction. We do this by placing the 1's
13785 complement of CR's EQ bit into TARGET. */
13786 rtx scratch = gen_reg_rtx (SImode);
13787 emit_insn (gen_rtx_SET (scratch,
13788 gen_rtx_EQ (SImode, cr,
13789 const0_rtx)));
13790 emit_insn (gen_rtx_SET (target,
13791 gen_rtx_XOR (SImode, scratch,
13792 GEN_INT (1))));
13793 }
13794 else
13795 {
13796 /* Emit code to copy the 4-bit condition register field
13797 CR into the least significant end of register TARGET. */
13798 rtx scratch1 = gen_reg_rtx (SImode);
13799 rtx scratch2 = gen_reg_rtx (SImode);
13800 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13801 emit_insn (gen_movcc (subreg, cr));
13802 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13803 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13804 }
13805 }
13806
13807 if (nonvoid)
13808 return target;
13809 return const0_rtx;
13810 }
13811
13812 *expandedp = false;
13813 return NULL_RTX;
13814 }
13815
13816 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13817
13818 static rtx
13819 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13820 rtx target)
13821 {
13822 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13823 if (fcode == RS6000_BUILTIN_CPU_INIT)
13824 return const0_rtx;
13825
13826 if (target == 0 || GET_MODE (target) != SImode)
13827 target = gen_reg_rtx (SImode);
13828
13829 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
13830 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
13831 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
13832 to a STRING_CST. */
13833 if (TREE_CODE (arg) == ARRAY_REF
13834 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
13835 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
13836 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
13837 arg = TREE_OPERAND (arg, 0);
13838
13839 if (TREE_CODE (arg) != STRING_CST)
13840 {
13841 error ("builtin %qs only accepts a string argument",
13842 rs6000_builtin_info[(size_t) fcode].name);
13843 return const0_rtx;
13844 }
13845
13846 if (fcode == RS6000_BUILTIN_CPU_IS)
13847 {
13848 const char *cpu = TREE_STRING_POINTER (arg);
13849 rtx cpuid = NULL_RTX;
13850 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
13851 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
13852 {
13853 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
13854 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
13855 break;
13856 }
13857 if (cpuid == NULL_RTX)
13858 {
13859 /* Invalid CPU argument. */
13860 error ("cpu %qs is an invalid argument to builtin %qs",
13861 cpu, rs6000_builtin_info[(size_t) fcode].name);
13862 return const0_rtx;
13863 }
13864
13865 rtx platform = gen_reg_rtx (SImode);
13866 rtx tcbmem = gen_const_mem (SImode,
13867 gen_rtx_PLUS (Pmode,
13868 gen_rtx_REG (Pmode, TLS_REGNUM),
13869 GEN_INT (TCB_PLATFORM_OFFSET)));
13870 emit_move_insn (platform, tcbmem);
13871 emit_insn (gen_eqsi3 (target, platform, cpuid));
13872 }
13873 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
13874 {
13875 const char *hwcap = TREE_STRING_POINTER (arg);
13876 rtx mask = NULL_RTX;
13877 int hwcap_offset;
13878 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
13879 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
13880 {
13881 mask = GEN_INT (cpu_supports_info[i].mask);
13882 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
13883 break;
13884 }
13885 if (mask == NULL_RTX)
13886 {
13887 /* Invalid HWCAP argument. */
13888 error ("%s %qs is an invalid argument to builtin %qs",
13889 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
13890 return const0_rtx;
13891 }
13892
13893 rtx tcb_hwcap = gen_reg_rtx (SImode);
13894 rtx tcbmem = gen_const_mem (SImode,
13895 gen_rtx_PLUS (Pmode,
13896 gen_rtx_REG (Pmode, TLS_REGNUM),
13897 GEN_INT (hwcap_offset)));
13898 emit_move_insn (tcb_hwcap, tcbmem);
13899 rtx scratch1 = gen_reg_rtx (SImode);
13900 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
13901 rtx scratch2 = gen_reg_rtx (SImode);
13902 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
13903 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
13904 }
13905 else
13906 gcc_unreachable ();
13907
13908 /* Record that we have expanded a CPU builtin, so that we can later
13909 emit a reference to the special symbol exported by LIBC to ensure we
13910 do not link against an old LIBC that doesn't support this feature. */
13911 cpu_builtin_p = true;
13912
13913 #else
13914 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
13915 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
13916
13917 /* For old LIBCs, always return FALSE. */
13918 emit_move_insn (target, GEN_INT (0));
13919 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
13920
13921 return target;
13922 }
13923
13924 static rtx
13925 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13926 {
13927 rtx pat;
13928 tree arg0 = CALL_EXPR_ARG (exp, 0);
13929 tree arg1 = CALL_EXPR_ARG (exp, 1);
13930 tree arg2 = CALL_EXPR_ARG (exp, 2);
13931 rtx op0 = expand_normal (arg0);
13932 rtx op1 = expand_normal (arg1);
13933 rtx op2 = expand_normal (arg2);
13934 machine_mode tmode = insn_data[icode].operand[0].mode;
13935 machine_mode mode0 = insn_data[icode].operand[1].mode;
13936 machine_mode mode1 = insn_data[icode].operand[2].mode;
13937 machine_mode mode2 = insn_data[icode].operand[3].mode;
13938
13939 if (icode == CODE_FOR_nothing)
13940 /* Builtin not supported on this processor. */
13941 return 0;
13942
13943 /* If we got invalid arguments bail out before generating bad rtl. */
13944 if (arg0 == error_mark_node
13945 || arg1 == error_mark_node
13946 || arg2 == error_mark_node)
13947 return const0_rtx;
13948
13949 /* Check and prepare argument depending on the instruction code.
13950
13951 Note that a switch statement instead of the sequence of tests
13952 would be incorrect as many of the CODE_FOR values could be
13953 CODE_FOR_nothing and that would yield multiple alternatives
13954 with identical values. We'd never reach here at runtime in
13955 this case. */
13956 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13957 || icode == CODE_FOR_altivec_vsldoi_v2df
13958 || icode == CODE_FOR_altivec_vsldoi_v4si
13959 || icode == CODE_FOR_altivec_vsldoi_v8hi
13960 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13961 {
13962 /* Only allow 4-bit unsigned literals. */
13963 STRIP_NOPS (arg2);
13964 if (TREE_CODE (arg2) != INTEGER_CST
13965 || TREE_INT_CST_LOW (arg2) & ~0xf)
13966 {
13967 error ("argument 3 must be a 4-bit unsigned literal");
13968 return CONST0_RTX (tmode);
13969 }
13970 }
13971 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13972 || icode == CODE_FOR_vsx_xxpermdi_v2di
13973 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
13974 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
13975 || icode == CODE_FOR_vsx_xxpermdi_v1ti
13976 || icode == CODE_FOR_vsx_xxpermdi_v4sf
13977 || icode == CODE_FOR_vsx_xxpermdi_v4si
13978 || icode == CODE_FOR_vsx_xxpermdi_v8hi
13979 || icode == CODE_FOR_vsx_xxpermdi_v16qi
13980 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13981 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13982 || icode == CODE_FOR_vsx_xxsldwi_v4si
13983 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13984 || icode == CODE_FOR_vsx_xxsldwi_v2di
13985 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13986 {
13987 /* Only allow 2-bit unsigned literals. */
13988 STRIP_NOPS (arg2);
13989 if (TREE_CODE (arg2) != INTEGER_CST
13990 || TREE_INT_CST_LOW (arg2) & ~0x3)
13991 {
13992 error ("argument 3 must be a 2-bit unsigned literal");
13993 return CONST0_RTX (tmode);
13994 }
13995 }
13996 else if (icode == CODE_FOR_vsx_set_v2df
13997 || icode == CODE_FOR_vsx_set_v2di
13998 || icode == CODE_FOR_bcdadd
13999 || icode == CODE_FOR_bcdadd_lt
14000 || icode == CODE_FOR_bcdadd_eq
14001 || icode == CODE_FOR_bcdadd_gt
14002 || icode == CODE_FOR_bcdsub
14003 || icode == CODE_FOR_bcdsub_lt
14004 || icode == CODE_FOR_bcdsub_eq
14005 || icode == CODE_FOR_bcdsub_gt)
14006 {
14007 /* Only allow 1-bit unsigned literals. */
14008 STRIP_NOPS (arg2);
14009 if (TREE_CODE (arg2) != INTEGER_CST
14010 || TREE_INT_CST_LOW (arg2) & ~0x1)
14011 {
14012 error ("argument 3 must be a 1-bit unsigned literal");
14013 return CONST0_RTX (tmode);
14014 }
14015 }
14016 else if (icode == CODE_FOR_dfp_ddedpd_dd
14017 || icode == CODE_FOR_dfp_ddedpd_td)
14018 {
14019 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14020 STRIP_NOPS (arg0);
14021 if (TREE_CODE (arg0) != INTEGER_CST
14022 || TREE_INT_CST_LOW (arg2) & ~0x3)
14023 {
14024 error ("argument 1 must be 0 or 2");
14025 return CONST0_RTX (tmode);
14026 }
14027 }
14028 else if (icode == CODE_FOR_dfp_denbcd_dd
14029 || icode == CODE_FOR_dfp_denbcd_td)
14030 {
14031 /* Only allow 1-bit unsigned literals. */
14032 STRIP_NOPS (arg0);
14033 if (TREE_CODE (arg0) != INTEGER_CST
14034 || TREE_INT_CST_LOW (arg0) & ~0x1)
14035 {
14036 error ("argument 1 must be a 1-bit unsigned literal");
14037 return CONST0_RTX (tmode);
14038 }
14039 }
14040 else if (icode == CODE_FOR_dfp_dscli_dd
14041 || icode == CODE_FOR_dfp_dscli_td
14042 || icode == CODE_FOR_dfp_dscri_dd
14043 || icode == CODE_FOR_dfp_dscri_td)
14044 {
14045 /* Only allow 6-bit unsigned literals. */
14046 STRIP_NOPS (arg1);
14047 if (TREE_CODE (arg1) != INTEGER_CST
14048 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14049 {
14050 error ("argument 2 must be a 6-bit unsigned literal");
14051 return CONST0_RTX (tmode);
14052 }
14053 }
14054 else if (icode == CODE_FOR_crypto_vshasigmaw
14055 || icode == CODE_FOR_crypto_vshasigmad)
14056 {
14057 /* Check whether the 2nd and 3rd arguments are integer constants and in
14058 range and prepare arguments. */
14059 STRIP_NOPS (arg1);
14060 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14061 {
14062 error ("argument 2 must be 0 or 1");
14063 return CONST0_RTX (tmode);
14064 }
14065
14066 STRIP_NOPS (arg2);
14067 if (TREE_CODE (arg2) != INTEGER_CST
14068 || wi::geu_p (wi::to_wide (arg2), 16))
14069 {
14070 error ("argument 3 must be in the range [0, 15]");
14071 return CONST0_RTX (tmode);
14072 }
14073 }
14074
14075 if (target == 0
14076 || GET_MODE (target) != tmode
14077 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14078 target = gen_reg_rtx (tmode);
14079
14080 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14081 op0 = copy_to_mode_reg (mode0, op0);
14082 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14083 op1 = copy_to_mode_reg (mode1, op1);
14084 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14085 op2 = copy_to_mode_reg (mode2, op2);
14086
14087 pat = GEN_FCN (icode) (target, op0, op1, op2);
14088 if (! pat)
14089 return 0;
14090 emit_insn (pat);
14091
14092 return target;
14093 }
14094
14095
14096 /* Expand the dst builtins. */
14097 static rtx
14098 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14099 bool *expandedp)
14100 {
14101 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14102 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14103 tree arg0, arg1, arg2;
14104 machine_mode mode0, mode1;
14105 rtx pat, op0, op1, op2;
14106 const struct builtin_description *d;
14107 size_t i;
14108
14109 *expandedp = false;
14110
14111 /* Handle DST variants. */
14112 d = bdesc_dst;
14113 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14114 if (d->code == fcode)
14115 {
14116 arg0 = CALL_EXPR_ARG (exp, 0);
14117 arg1 = CALL_EXPR_ARG (exp, 1);
14118 arg2 = CALL_EXPR_ARG (exp, 2);
14119 op0 = expand_normal (arg0);
14120 op1 = expand_normal (arg1);
14121 op2 = expand_normal (arg2);
14122 mode0 = insn_data[d->icode].operand[0].mode;
14123 mode1 = insn_data[d->icode].operand[1].mode;
14124
14125 /* Invalid arguments, bail out before generating bad rtl. */
14126 if (arg0 == error_mark_node
14127 || arg1 == error_mark_node
14128 || arg2 == error_mark_node)
14129 return const0_rtx;
14130
14131 *expandedp = true;
14132 STRIP_NOPS (arg2);
14133 if (TREE_CODE (arg2) != INTEGER_CST
14134 || TREE_INT_CST_LOW (arg2) & ~0x3)
14135 {
14136 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14137 return const0_rtx;
14138 }
14139
14140 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14141 op0 = copy_to_mode_reg (Pmode, op0);
14142 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14143 op1 = copy_to_mode_reg (mode1, op1);
14144
14145 pat = GEN_FCN (d->icode) (op0, op1, op2);
14146 if (pat != 0)
14147 emit_insn (pat);
14148
14149 return NULL_RTX;
14150 }
14151
14152 return NULL_RTX;
14153 }
14154
14155 /* Expand vec_init builtin. */
14156 static rtx
14157 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14158 {
14159 machine_mode tmode = TYPE_MODE (type);
14160 machine_mode inner_mode = GET_MODE_INNER (tmode);
14161 int i, n_elt = GET_MODE_NUNITS (tmode);
14162
14163 gcc_assert (VECTOR_MODE_P (tmode));
14164 gcc_assert (n_elt == call_expr_nargs (exp));
14165
14166 if (!target || !register_operand (target, tmode))
14167 target = gen_reg_rtx (tmode);
14168
14169 /* If we have a vector compromised of a single element, such as V1TImode, do
14170 the initialization directly. */
14171 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14172 {
14173 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14174 emit_move_insn (target, gen_lowpart (tmode, x));
14175 }
14176 else
14177 {
14178 rtvec v = rtvec_alloc (n_elt);
14179
14180 for (i = 0; i < n_elt; ++i)
14181 {
14182 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14183 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14184 }
14185
14186 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14187 }
14188
14189 return target;
14190 }
14191
14192 /* Return the integer constant in ARG. Constrain it to be in the range
14193 of the subparts of VEC_TYPE; issue an error if not. */
14194
14195 static int
14196 get_element_number (tree vec_type, tree arg)
14197 {
14198 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14199
14200 if (!tree_fits_uhwi_p (arg)
14201 || (elt = tree_to_uhwi (arg), elt > max))
14202 {
14203 error ("selector must be an integer constant in the range [0, %wi]", max);
14204 return 0;
14205 }
14206
14207 return elt;
14208 }
14209
14210 /* Expand vec_set builtin. */
14211 static rtx
14212 altivec_expand_vec_set_builtin (tree exp)
14213 {
14214 machine_mode tmode, mode1;
14215 tree arg0, arg1, arg2;
14216 int elt;
14217 rtx op0, op1;
14218
14219 arg0 = CALL_EXPR_ARG (exp, 0);
14220 arg1 = CALL_EXPR_ARG (exp, 1);
14221 arg2 = CALL_EXPR_ARG (exp, 2);
14222
14223 tmode = TYPE_MODE (TREE_TYPE (arg0));
14224 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14225 gcc_assert (VECTOR_MODE_P (tmode));
14226
14227 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14228 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14229 elt = get_element_number (TREE_TYPE (arg0), arg2);
14230
14231 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14232 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14233
14234 op0 = force_reg (tmode, op0);
14235 op1 = force_reg (mode1, op1);
14236
14237 rs6000_expand_vector_set (op0, op1, elt);
14238
14239 return op0;
14240 }
14241
14242 /* Expand vec_ext builtin. */
14243 static rtx
14244 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14245 {
14246 machine_mode tmode, mode0;
14247 tree arg0, arg1;
14248 rtx op0;
14249 rtx op1;
14250
14251 arg0 = CALL_EXPR_ARG (exp, 0);
14252 arg1 = CALL_EXPR_ARG (exp, 1);
14253
14254 op0 = expand_normal (arg0);
14255 op1 = expand_normal (arg1);
14256
14257 if (TREE_CODE (arg1) == INTEGER_CST)
14258 {
14259 unsigned HOST_WIDE_INT elt;
14260 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
14261 unsigned int truncated_selector;
14262 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
14263 returns low-order bits of INTEGER_CST for modulo indexing. */
14264 elt = TREE_INT_CST_LOW (arg1);
14265 truncated_selector = elt % size;
14266 op1 = GEN_INT (truncated_selector);
14267 }
14268
14269 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14270 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14271 gcc_assert (VECTOR_MODE_P (mode0));
14272
14273 op0 = force_reg (mode0, op0);
14274
14275 if (optimize || !target || !register_operand (target, tmode))
14276 target = gen_reg_rtx (tmode);
14277
14278 rs6000_expand_vector_extract (target, op0, op1);
14279
14280 return target;
14281 }
14282
14283 /* Expand the builtin in EXP and store the result in TARGET. Store
14284 true in *EXPANDEDP if we found a builtin to expand. */
14285 static rtx
14286 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14287 {
14288 const struct builtin_description *d;
14289 size_t i;
14290 enum insn_code icode;
14291 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14292 tree arg0, arg1, arg2;
14293 rtx op0, pat;
14294 machine_mode tmode, mode0;
14295 enum rs6000_builtins fcode
14296 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14297
14298 if (rs6000_overloaded_builtin_p (fcode))
14299 {
14300 *expandedp = true;
14301 error ("unresolved overload for Altivec builtin %qF", fndecl);
14302
14303 /* Given it is invalid, just generate a normal call. */
14304 return expand_call (exp, target, false);
14305 }
14306
14307 target = altivec_expand_dst_builtin (exp, target, expandedp);
14308 if (*expandedp)
14309 return target;
14310
14311 *expandedp = true;
14312
14313 switch (fcode)
14314 {
14315 case ALTIVEC_BUILTIN_STVX_V2DF:
14316 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14317 case ALTIVEC_BUILTIN_STVX_V2DI:
14318 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14319 case ALTIVEC_BUILTIN_STVX_V4SF:
14320 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14321 case ALTIVEC_BUILTIN_STVX:
14322 case ALTIVEC_BUILTIN_STVX_V4SI:
14323 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14324 case ALTIVEC_BUILTIN_STVX_V8HI:
14325 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14326 case ALTIVEC_BUILTIN_STVX_V16QI:
14327 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14328 case ALTIVEC_BUILTIN_STVEBX:
14329 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14330 case ALTIVEC_BUILTIN_STVEHX:
14331 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14332 case ALTIVEC_BUILTIN_STVEWX:
14333 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14334 case ALTIVEC_BUILTIN_STVXL_V2DF:
14335 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14336 case ALTIVEC_BUILTIN_STVXL_V2DI:
14337 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14338 case ALTIVEC_BUILTIN_STVXL_V4SF:
14339 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14340 case ALTIVEC_BUILTIN_STVXL:
14341 case ALTIVEC_BUILTIN_STVXL_V4SI:
14342 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14343 case ALTIVEC_BUILTIN_STVXL_V8HI:
14344 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14345 case ALTIVEC_BUILTIN_STVXL_V16QI:
14346 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14347
14348 case ALTIVEC_BUILTIN_STVLX:
14349 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14350 case ALTIVEC_BUILTIN_STVLXL:
14351 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14352 case ALTIVEC_BUILTIN_STVRX:
14353 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14354 case ALTIVEC_BUILTIN_STVRXL:
14355 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14356
14357 case P9V_BUILTIN_STXVL:
14358 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14359
14360 case P9V_BUILTIN_XST_LEN_R:
14361 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14362
14363 case VSX_BUILTIN_STXVD2X_V1TI:
14364 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14365 case VSX_BUILTIN_STXVD2X_V2DF:
14366 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14367 case VSX_BUILTIN_STXVD2X_V2DI:
14368 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14369 case VSX_BUILTIN_STXVW4X_V4SF:
14370 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14371 case VSX_BUILTIN_STXVW4X_V4SI:
14372 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14373 case VSX_BUILTIN_STXVW4X_V8HI:
14374 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14375 case VSX_BUILTIN_STXVW4X_V16QI:
14376 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14377
14378 /* For the following on big endian, it's ok to use any appropriate
14379 unaligned-supporting store, so use a generic expander. For
14380 little-endian, the exact element-reversing instruction must
14381 be used. */
14382 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14383 {
14384 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14385 : CODE_FOR_vsx_st_elemrev_v1ti);
14386 return altivec_expand_stv_builtin (code, exp);
14387 }
14388 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14389 {
14390 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14391 : CODE_FOR_vsx_st_elemrev_v2df);
14392 return altivec_expand_stv_builtin (code, exp);
14393 }
14394 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14395 {
14396 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14397 : CODE_FOR_vsx_st_elemrev_v2di);
14398 return altivec_expand_stv_builtin (code, exp);
14399 }
14400 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14401 {
14402 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14403 : CODE_FOR_vsx_st_elemrev_v4sf);
14404 return altivec_expand_stv_builtin (code, exp);
14405 }
14406 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14407 {
14408 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14409 : CODE_FOR_vsx_st_elemrev_v4si);
14410 return altivec_expand_stv_builtin (code, exp);
14411 }
14412 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14413 {
14414 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14415 : CODE_FOR_vsx_st_elemrev_v8hi);
14416 return altivec_expand_stv_builtin (code, exp);
14417 }
14418 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14419 {
14420 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14421 : CODE_FOR_vsx_st_elemrev_v16qi);
14422 return altivec_expand_stv_builtin (code, exp);
14423 }
14424
14425 case ALTIVEC_BUILTIN_MFVSCR:
14426 icode = CODE_FOR_altivec_mfvscr;
14427 tmode = insn_data[icode].operand[0].mode;
14428
14429 if (target == 0
14430 || GET_MODE (target) != tmode
14431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14432 target = gen_reg_rtx (tmode);
14433
14434 pat = GEN_FCN (icode) (target);
14435 if (! pat)
14436 return 0;
14437 emit_insn (pat);
14438 return target;
14439
14440 case ALTIVEC_BUILTIN_MTVSCR:
14441 icode = CODE_FOR_altivec_mtvscr;
14442 arg0 = CALL_EXPR_ARG (exp, 0);
14443 op0 = expand_normal (arg0);
14444 mode0 = insn_data[icode].operand[0].mode;
14445
14446 /* If we got invalid arguments bail out before generating bad rtl. */
14447 if (arg0 == error_mark_node)
14448 return const0_rtx;
14449
14450 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14451 op0 = copy_to_mode_reg (mode0, op0);
14452
14453 pat = GEN_FCN (icode) (op0);
14454 if (pat)
14455 emit_insn (pat);
14456 return NULL_RTX;
14457
14458 case ALTIVEC_BUILTIN_DSSALL:
14459 emit_insn (gen_altivec_dssall ());
14460 return NULL_RTX;
14461
14462 case ALTIVEC_BUILTIN_DSS:
14463 icode = CODE_FOR_altivec_dss;
14464 arg0 = CALL_EXPR_ARG (exp, 0);
14465 STRIP_NOPS (arg0);
14466 op0 = expand_normal (arg0);
14467 mode0 = insn_data[icode].operand[0].mode;
14468
14469 /* If we got invalid arguments bail out before generating bad rtl. */
14470 if (arg0 == error_mark_node)
14471 return const0_rtx;
14472
14473 if (TREE_CODE (arg0) != INTEGER_CST
14474 || TREE_INT_CST_LOW (arg0) & ~0x3)
14475 {
14476 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14477 return const0_rtx;
14478 }
14479
14480 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14481 op0 = copy_to_mode_reg (mode0, op0);
14482
14483 emit_insn (gen_altivec_dss (op0));
14484 return NULL_RTX;
14485
14486 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14487 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14488 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14489 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14490 case VSX_BUILTIN_VEC_INIT_V2DF:
14491 case VSX_BUILTIN_VEC_INIT_V2DI:
14492 case VSX_BUILTIN_VEC_INIT_V1TI:
14493 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14494
14495 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14496 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14497 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14498 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14499 case VSX_BUILTIN_VEC_SET_V2DF:
14500 case VSX_BUILTIN_VEC_SET_V2DI:
14501 case VSX_BUILTIN_VEC_SET_V1TI:
14502 return altivec_expand_vec_set_builtin (exp);
14503
14504 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14505 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14506 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14507 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14508 case VSX_BUILTIN_VEC_EXT_V2DF:
14509 case VSX_BUILTIN_VEC_EXT_V2DI:
14510 case VSX_BUILTIN_VEC_EXT_V1TI:
14511 return altivec_expand_vec_ext_builtin (exp, target);
14512
14513 case P9V_BUILTIN_VEC_EXTRACT4B:
14514 arg1 = CALL_EXPR_ARG (exp, 1);
14515 STRIP_NOPS (arg1);
14516
14517 /* Generate a normal call if it is invalid. */
14518 if (arg1 == error_mark_node)
14519 return expand_call (exp, target, false);
14520
14521 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14522 {
14523 error ("second argument to %qs must be [0, 12]", "vec_vextract4b");
14524 return expand_call (exp, target, false);
14525 }
14526 break;
14527
14528 case P9V_BUILTIN_VEC_INSERT4B:
14529 arg2 = CALL_EXPR_ARG (exp, 2);
14530 STRIP_NOPS (arg2);
14531
14532 /* Generate a normal call if it is invalid. */
14533 if (arg2 == error_mark_node)
14534 return expand_call (exp, target, false);
14535
14536 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14537 {
14538 error ("third argument to %qs must be [0, 12]", "vec_vinsert4b");
14539 return expand_call (exp, target, false);
14540 }
14541 break;
14542
14543 default:
14544 break;
14545 /* Fall through. */
14546 }
14547
14548 /* Expand abs* operations. */
14549 d = bdesc_abs;
14550 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14551 if (d->code == fcode)
14552 return altivec_expand_abs_builtin (d->icode, exp, target);
14553
14554 /* Expand the AltiVec predicates. */
14555 d = bdesc_altivec_preds;
14556 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14557 if (d->code == fcode)
14558 return altivec_expand_predicate_builtin (d->icode, exp, target);
14559
14560 /* LV* are funky. We initialized them differently. */
14561 switch (fcode)
14562 {
14563 case ALTIVEC_BUILTIN_LVSL:
14564 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14565 exp, target, false);
14566 case ALTIVEC_BUILTIN_LVSR:
14567 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14568 exp, target, false);
14569 case ALTIVEC_BUILTIN_LVEBX:
14570 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14571 exp, target, false);
14572 case ALTIVEC_BUILTIN_LVEHX:
14573 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14574 exp, target, false);
14575 case ALTIVEC_BUILTIN_LVEWX:
14576 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14577 exp, target, false);
14578 case ALTIVEC_BUILTIN_LVXL_V2DF:
14579 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14580 exp, target, false);
14581 case ALTIVEC_BUILTIN_LVXL_V2DI:
14582 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14583 exp, target, false);
14584 case ALTIVEC_BUILTIN_LVXL_V4SF:
14585 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14586 exp, target, false);
14587 case ALTIVEC_BUILTIN_LVXL:
14588 case ALTIVEC_BUILTIN_LVXL_V4SI:
14589 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14590 exp, target, false);
14591 case ALTIVEC_BUILTIN_LVXL_V8HI:
14592 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14593 exp, target, false);
14594 case ALTIVEC_BUILTIN_LVXL_V16QI:
14595 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14596 exp, target, false);
14597 case ALTIVEC_BUILTIN_LVX_V1TI:
14598 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
14599 exp, target, false);
14600 case ALTIVEC_BUILTIN_LVX_V2DF:
14601 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14602 exp, target, false);
14603 case ALTIVEC_BUILTIN_LVX_V2DI:
14604 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14605 exp, target, false);
14606 case ALTIVEC_BUILTIN_LVX_V4SF:
14607 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14608 exp, target, false);
14609 case ALTIVEC_BUILTIN_LVX:
14610 case ALTIVEC_BUILTIN_LVX_V4SI:
14611 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14612 exp, target, false);
14613 case ALTIVEC_BUILTIN_LVX_V8HI:
14614 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14615 exp, target, false);
14616 case ALTIVEC_BUILTIN_LVX_V16QI:
14617 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14618 exp, target, false);
14619 case ALTIVEC_BUILTIN_LVLX:
14620 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14621 exp, target, true);
14622 case ALTIVEC_BUILTIN_LVLXL:
14623 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14624 exp, target, true);
14625 case ALTIVEC_BUILTIN_LVRX:
14626 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14627 exp, target, true);
14628 case ALTIVEC_BUILTIN_LVRXL:
14629 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14630 exp, target, true);
14631 case VSX_BUILTIN_LXVD2X_V1TI:
14632 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14633 exp, target, false);
14634 case VSX_BUILTIN_LXVD2X_V2DF:
14635 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14636 exp, target, false);
14637 case VSX_BUILTIN_LXVD2X_V2DI:
14638 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14639 exp, target, false);
14640 case VSX_BUILTIN_LXVW4X_V4SF:
14641 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14642 exp, target, false);
14643 case VSX_BUILTIN_LXVW4X_V4SI:
14644 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14645 exp, target, false);
14646 case VSX_BUILTIN_LXVW4X_V8HI:
14647 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14648 exp, target, false);
14649 case VSX_BUILTIN_LXVW4X_V16QI:
14650 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14651 exp, target, false);
14652 /* For the following on big endian, it's ok to use any appropriate
14653 unaligned-supporting load, so use a generic expander. For
14654 little-endian, the exact element-reversing instruction must
14655 be used. */
14656 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14657 {
14658 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14659 : CODE_FOR_vsx_ld_elemrev_v2df);
14660 return altivec_expand_lv_builtin (code, exp, target, false);
14661 }
14662 case VSX_BUILTIN_LD_ELEMREV_V1TI:
14663 {
14664 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
14665 : CODE_FOR_vsx_ld_elemrev_v1ti);
14666 return altivec_expand_lv_builtin (code, exp, target, false);
14667 }
14668 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14669 {
14670 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14671 : CODE_FOR_vsx_ld_elemrev_v2di);
14672 return altivec_expand_lv_builtin (code, exp, target, false);
14673 }
14674 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14675 {
14676 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14677 : CODE_FOR_vsx_ld_elemrev_v4sf);
14678 return altivec_expand_lv_builtin (code, exp, target, false);
14679 }
14680 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14681 {
14682 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14683 : CODE_FOR_vsx_ld_elemrev_v4si);
14684 return altivec_expand_lv_builtin (code, exp, target, false);
14685 }
14686 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14687 {
14688 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14689 : CODE_FOR_vsx_ld_elemrev_v8hi);
14690 return altivec_expand_lv_builtin (code, exp, target, false);
14691 }
14692 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14693 {
14694 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14695 : CODE_FOR_vsx_ld_elemrev_v16qi);
14696 return altivec_expand_lv_builtin (code, exp, target, false);
14697 }
14698 break;
14699 default:
14700 break;
14701 /* Fall through. */
14702 }
14703
14704 *expandedp = false;
14705 return NULL_RTX;
14706 }
14707
14708 /* Check whether a builtin function is supported in this target
14709 configuration. */
14710 bool
14711 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
14712 {
14713 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
14714 if ((fnmask & rs6000_builtin_mask) != fnmask)
14715 return false;
14716 else
14717 return true;
14718 }
14719
14720 /* Raise an error message for a builtin function that is called without the
14721 appropriate target options being set. */
14722
14723 static void
14724 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14725 {
14726 size_t uns_fncode = (size_t) fncode;
14727 const char *name = rs6000_builtin_info[uns_fncode].name;
14728 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14729
14730 gcc_assert (name != NULL);
14731 if ((fnmask & RS6000_BTM_CELL) != 0)
14732 error ("builtin function %qs is only valid for the cell processor", name);
14733 else if ((fnmask & RS6000_BTM_VSX) != 0)
14734 error ("builtin function %qs requires the %qs option", name, "-mvsx");
14735 else if ((fnmask & RS6000_BTM_HTM) != 0)
14736 error ("builtin function %qs requires the %qs option", name, "-mhtm");
14737 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14738 error ("builtin function %qs requires the %qs option", name, "-maltivec");
14739 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14740 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14741 error ("builtin function %qs requires the %qs and %qs options",
14742 name, "-mhard-dfp", "-mpower8-vector");
14743 else if ((fnmask & RS6000_BTM_DFP) != 0)
14744 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
14745 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14746 error ("builtin function %qs requires the %qs option", name,
14747 "-mpower8-vector");
14748 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14749 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14750 error ("builtin function %qs requires the %qs and %qs options",
14751 name, "-mcpu=power9", "-m64");
14752 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14753 error ("builtin function %qs requires the %qs option", name,
14754 "-mcpu=power9");
14755 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14756 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14757 error ("builtin function %qs requires the %qs and %qs options",
14758 name, "-mcpu=power9", "-m64");
14759 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
14760 error ("builtin function %qs requires the %qs option", name,
14761 "-mcpu=power9");
14762 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
14763 {
14764 if (!TARGET_HARD_FLOAT)
14765 error ("builtin function %qs requires the %qs option", name,
14766 "-mhard-float");
14767 else
14768 error ("builtin function %qs requires the %qs option", name,
14769 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
14770 }
14771 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14772 error ("builtin function %qs requires the %qs option", name,
14773 "-mhard-float");
14774 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
14775 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
14776 name);
14777 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
14778 error ("builtin function %qs requires the %qs option", name,
14779 "%<-mfloat128%>");
14780 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14781 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14782 error ("builtin function %qs requires the %qs (or newer), and "
14783 "%qs or %qs options",
14784 name, "-mcpu=power7", "-m64", "-mpowerpc64");
14785 else
14786 error ("builtin function %qs is not supported with the current options",
14787 name);
14788 }
14789
14790 /* Target hook for early folding of built-ins, shamelessly stolen
14791 from ia64.c. */
14792
14793 static tree
14794 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
14795 int n_args ATTRIBUTE_UNUSED,
14796 tree *args ATTRIBUTE_UNUSED,
14797 bool ignore ATTRIBUTE_UNUSED)
14798 {
14799 #ifdef SUBTARGET_FOLD_BUILTIN
14800 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
14801 #else
14802 return NULL_TREE;
14803 #endif
14804 }
14805
14806 /* Helper function to sort out which built-ins may be valid without having
14807 a LHS. */
14808 static bool
14809 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
14810 {
14811 switch (fn_code)
14812 {
14813 case ALTIVEC_BUILTIN_STVX_V16QI:
14814 case ALTIVEC_BUILTIN_STVX_V8HI:
14815 case ALTIVEC_BUILTIN_STVX_V4SI:
14816 case ALTIVEC_BUILTIN_STVX_V4SF:
14817 case ALTIVEC_BUILTIN_STVX_V2DI:
14818 case ALTIVEC_BUILTIN_STVX_V2DF:
14819 case VSX_BUILTIN_STXVW4X_V16QI:
14820 case VSX_BUILTIN_STXVW4X_V8HI:
14821 case VSX_BUILTIN_STXVW4X_V4SF:
14822 case VSX_BUILTIN_STXVW4X_V4SI:
14823 case VSX_BUILTIN_STXVD2X_V2DF:
14824 case VSX_BUILTIN_STXVD2X_V2DI:
14825 return true;
14826 default:
14827 return false;
14828 }
14829 }
14830
14831 /* Helper function to handle the gimple folding of a vector compare
14832 operation. This sets up true/false vectors, and uses the
14833 VEC_COND_EXPR operation.
14834 CODE indicates which comparison is to be made. (EQ, GT, ...).
14835 TYPE indicates the type of the result. */
14836 static tree
14837 fold_build_vec_cmp (tree_code code, tree type,
14838 tree arg0, tree arg1)
14839 {
14840 tree cmp_type = build_same_sized_truth_vector_type (type);
14841 tree zero_vec = build_zero_cst (type);
14842 tree minus_one_vec = build_minus_one_cst (type);
14843 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
14844 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
14845 }
14846
14847 /* Helper function to handle the in-between steps for the
14848 vector compare built-ins. */
14849 static void
14850 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
14851 {
14852 tree arg0 = gimple_call_arg (stmt, 0);
14853 tree arg1 = gimple_call_arg (stmt, 1);
14854 tree lhs = gimple_call_lhs (stmt);
14855 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
14856 gimple *g = gimple_build_assign (lhs, cmp);
14857 gimple_set_location (g, gimple_location (stmt));
14858 gsi_replace (gsi, g, true);
14859 }
14860
14861 /* Helper function to map V2DF and V4SF types to their
14862 integral equivalents (V2DI and V4SI). */
14863 tree map_to_integral_tree_type (tree input_tree_type)
14864 {
14865 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
14866 return input_tree_type;
14867 else
14868 {
14869 if (types_compatible_p (TREE_TYPE (input_tree_type),
14870 TREE_TYPE (V2DF_type_node)))
14871 return V2DI_type_node;
14872 else if (types_compatible_p (TREE_TYPE (input_tree_type),
14873 TREE_TYPE (V4SF_type_node)))
14874 return V4SI_type_node;
14875 else
14876 gcc_unreachable ();
14877 }
14878 }
14879
14880 /* Helper function to handle the vector merge[hl] built-ins. The
14881 implementation difference between h and l versions for this code are in
14882 the values used when building of the permute vector for high word versus
14883 low word merge. The variance is keyed off the use_high parameter. */
14884 static void
14885 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
14886 {
14887 tree arg0 = gimple_call_arg (stmt, 0);
14888 tree arg1 = gimple_call_arg (stmt, 1);
14889 tree lhs = gimple_call_lhs (stmt);
14890 tree lhs_type = TREE_TYPE (lhs);
14891 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14892 int midpoint = n_elts / 2;
14893 int offset = 0;
14894
14895 if (use_high == 1)
14896 offset = midpoint;
14897
14898 /* The permute_type will match the lhs for integral types. For double and
14899 float types, the permute type needs to map to the V2 or V4 type that
14900 matches size. */
14901 tree permute_type;
14902 permute_type = map_to_integral_tree_type (lhs_type);
14903 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14904
14905 for (int i = 0; i < midpoint; i++)
14906 {
14907 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14908 offset + i));
14909 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14910 offset + n_elts + i));
14911 }
14912
14913 tree permute = elts.build ();
14914
14915 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14916 gimple_set_location (g, gimple_location (stmt));
14917 gsi_replace (gsi, g, true);
14918 }
14919
14920 /* Helper function to handle the vector merge[eo] built-ins. */
14921 static void
14922 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
14923 {
14924 tree arg0 = gimple_call_arg (stmt, 0);
14925 tree arg1 = gimple_call_arg (stmt, 1);
14926 tree lhs = gimple_call_lhs (stmt);
14927 tree lhs_type = TREE_TYPE (lhs);
14928 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14929
14930 /* The permute_type will match the lhs for integral types. For double and
14931 float types, the permute type needs to map to the V2 or V4 type that
14932 matches size. */
14933 tree permute_type;
14934 permute_type = map_to_integral_tree_type (lhs_type);
14935
14936 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14937
14938 /* Build the permute vector. */
14939 for (int i = 0; i < n_elts / 2; i++)
14940 {
14941 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14942 2*i + use_odd));
14943 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14944 2*i + use_odd + n_elts));
14945 }
14946
14947 tree permute = elts.build ();
14948
14949 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14950 gimple_set_location (g, gimple_location (stmt));
14951 gsi_replace (gsi, g, true);
14952 }
14953
14954 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
14955 a constant, use rs6000_fold_builtin.) */
14956
14957 bool
14958 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
14959 {
14960 gimple *stmt = gsi_stmt (*gsi);
14961 tree fndecl = gimple_call_fndecl (stmt);
14962 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
14963 enum rs6000_builtins fn_code
14964 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14965 tree arg0, arg1, lhs, temp;
14966 enum tree_code bcode;
14967 gimple *g;
14968
14969 size_t uns_fncode = (size_t) fn_code;
14970 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
14971 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
14972 const char *fn_name2 = (icode != CODE_FOR_nothing)
14973 ? get_insn_name ((int) icode)
14974 : "nothing";
14975
14976 if (TARGET_DEBUG_BUILTIN)
14977 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
14978 fn_code, fn_name1, fn_name2);
14979
14980 if (!rs6000_fold_gimple)
14981 return false;
14982
14983 /* Prevent gimple folding for code that does not have a LHS, unless it is
14984 allowed per the rs6000_builtin_valid_without_lhs helper function. */
14985 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
14986 return false;
14987
14988 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
14989 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
14990 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
14991 if (!func_valid_p)
14992 return false;
14993
14994 switch (fn_code)
14995 {
14996 /* Flavors of vec_add. We deliberately don't expand
14997 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
14998 TImode, resulting in much poorer code generation. */
14999 case ALTIVEC_BUILTIN_VADDUBM:
15000 case ALTIVEC_BUILTIN_VADDUHM:
15001 case ALTIVEC_BUILTIN_VADDUWM:
15002 case P8V_BUILTIN_VADDUDM:
15003 case ALTIVEC_BUILTIN_VADDFP:
15004 case VSX_BUILTIN_XVADDDP:
15005 bcode = PLUS_EXPR;
15006 do_binary:
15007 arg0 = gimple_call_arg (stmt, 0);
15008 arg1 = gimple_call_arg (stmt, 1);
15009 lhs = gimple_call_lhs (stmt);
15010 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
15011 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
15012 {
15013 /* Ensure the binary operation is performed in a type
15014 that wraps if it is integral type. */
15015 gimple_seq stmts = NULL;
15016 tree type = unsigned_type_for (TREE_TYPE (lhs));
15017 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15018 type, arg0);
15019 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15020 type, arg1);
15021 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
15022 type, uarg0, uarg1);
15023 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15024 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
15025 build1 (VIEW_CONVERT_EXPR,
15026 TREE_TYPE (lhs), res));
15027 gsi_replace (gsi, g, true);
15028 return true;
15029 }
15030 g = gimple_build_assign (lhs, bcode, arg0, arg1);
15031 gimple_set_location (g, gimple_location (stmt));
15032 gsi_replace (gsi, g, true);
15033 return true;
15034 /* Flavors of vec_sub. We deliberately don't expand
15035 P8V_BUILTIN_VSUBUQM. */
15036 case ALTIVEC_BUILTIN_VSUBUBM:
15037 case ALTIVEC_BUILTIN_VSUBUHM:
15038 case ALTIVEC_BUILTIN_VSUBUWM:
15039 case P8V_BUILTIN_VSUBUDM:
15040 case ALTIVEC_BUILTIN_VSUBFP:
15041 case VSX_BUILTIN_XVSUBDP:
15042 bcode = MINUS_EXPR;
15043 goto do_binary;
15044 case VSX_BUILTIN_XVMULSP:
15045 case VSX_BUILTIN_XVMULDP:
15046 arg0 = gimple_call_arg (stmt, 0);
15047 arg1 = gimple_call_arg (stmt, 1);
15048 lhs = gimple_call_lhs (stmt);
15049 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15050 gimple_set_location (g, gimple_location (stmt));
15051 gsi_replace (gsi, g, true);
15052 return true;
15053 /* Even element flavors of vec_mul (signed). */
15054 case ALTIVEC_BUILTIN_VMULESB:
15055 case ALTIVEC_BUILTIN_VMULESH:
15056 case P8V_BUILTIN_VMULESW:
15057 /* Even element flavors of vec_mul (unsigned). */
15058 case ALTIVEC_BUILTIN_VMULEUB:
15059 case ALTIVEC_BUILTIN_VMULEUH:
15060 case P8V_BUILTIN_VMULEUW:
15061 arg0 = gimple_call_arg (stmt, 0);
15062 arg1 = gimple_call_arg (stmt, 1);
15063 lhs = gimple_call_lhs (stmt);
15064 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15065 gimple_set_location (g, gimple_location (stmt));
15066 gsi_replace (gsi, g, true);
15067 return true;
15068 /* Odd element flavors of vec_mul (signed). */
15069 case ALTIVEC_BUILTIN_VMULOSB:
15070 case ALTIVEC_BUILTIN_VMULOSH:
15071 case P8V_BUILTIN_VMULOSW:
15072 /* Odd element flavors of vec_mul (unsigned). */
15073 case ALTIVEC_BUILTIN_VMULOUB:
15074 case ALTIVEC_BUILTIN_VMULOUH:
15075 case P8V_BUILTIN_VMULOUW:
15076 arg0 = gimple_call_arg (stmt, 0);
15077 arg1 = gimple_call_arg (stmt, 1);
15078 lhs = gimple_call_lhs (stmt);
15079 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15080 gimple_set_location (g, gimple_location (stmt));
15081 gsi_replace (gsi, g, true);
15082 return true;
15083 /* Flavors of vec_div (Integer). */
15084 case VSX_BUILTIN_DIV_V2DI:
15085 case VSX_BUILTIN_UDIV_V2DI:
15086 arg0 = gimple_call_arg (stmt, 0);
15087 arg1 = gimple_call_arg (stmt, 1);
15088 lhs = gimple_call_lhs (stmt);
15089 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15090 gimple_set_location (g, gimple_location (stmt));
15091 gsi_replace (gsi, g, true);
15092 return true;
15093 /* Flavors of vec_div (Float). */
15094 case VSX_BUILTIN_XVDIVSP:
15095 case VSX_BUILTIN_XVDIVDP:
15096 arg0 = gimple_call_arg (stmt, 0);
15097 arg1 = gimple_call_arg (stmt, 1);
15098 lhs = gimple_call_lhs (stmt);
15099 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15100 gimple_set_location (g, gimple_location (stmt));
15101 gsi_replace (gsi, g, true);
15102 return true;
15103 /* Flavors of vec_and. */
15104 case ALTIVEC_BUILTIN_VAND:
15105 arg0 = gimple_call_arg (stmt, 0);
15106 arg1 = gimple_call_arg (stmt, 1);
15107 lhs = gimple_call_lhs (stmt);
15108 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15109 gimple_set_location (g, gimple_location (stmt));
15110 gsi_replace (gsi, g, true);
15111 return true;
15112 /* Flavors of vec_andc. */
15113 case ALTIVEC_BUILTIN_VANDC:
15114 arg0 = gimple_call_arg (stmt, 0);
15115 arg1 = gimple_call_arg (stmt, 1);
15116 lhs = gimple_call_lhs (stmt);
15117 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15118 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15119 gimple_set_location (g, gimple_location (stmt));
15120 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15121 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15122 gimple_set_location (g, gimple_location (stmt));
15123 gsi_replace (gsi, g, true);
15124 return true;
15125 /* Flavors of vec_nand. */
15126 case P8V_BUILTIN_VEC_NAND:
15127 case P8V_BUILTIN_NAND_V16QI:
15128 case P8V_BUILTIN_NAND_V8HI:
15129 case P8V_BUILTIN_NAND_V4SI:
15130 case P8V_BUILTIN_NAND_V4SF:
15131 case P8V_BUILTIN_NAND_V2DF:
15132 case P8V_BUILTIN_NAND_V2DI:
15133 arg0 = gimple_call_arg (stmt, 0);
15134 arg1 = gimple_call_arg (stmt, 1);
15135 lhs = gimple_call_lhs (stmt);
15136 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15137 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15138 gimple_set_location (g, gimple_location (stmt));
15139 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15140 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15141 gimple_set_location (g, gimple_location (stmt));
15142 gsi_replace (gsi, g, true);
15143 return true;
15144 /* Flavors of vec_or. */
15145 case ALTIVEC_BUILTIN_VOR:
15146 arg0 = gimple_call_arg (stmt, 0);
15147 arg1 = gimple_call_arg (stmt, 1);
15148 lhs = gimple_call_lhs (stmt);
15149 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15150 gimple_set_location (g, gimple_location (stmt));
15151 gsi_replace (gsi, g, true);
15152 return true;
15153 /* flavors of vec_orc. */
15154 case P8V_BUILTIN_ORC_V16QI:
15155 case P8V_BUILTIN_ORC_V8HI:
15156 case P8V_BUILTIN_ORC_V4SI:
15157 case P8V_BUILTIN_ORC_V4SF:
15158 case P8V_BUILTIN_ORC_V2DF:
15159 case P8V_BUILTIN_ORC_V2DI:
15160 arg0 = gimple_call_arg (stmt, 0);
15161 arg1 = gimple_call_arg (stmt, 1);
15162 lhs = gimple_call_lhs (stmt);
15163 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15164 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15165 gimple_set_location (g, gimple_location (stmt));
15166 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15167 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15168 gimple_set_location (g, gimple_location (stmt));
15169 gsi_replace (gsi, g, true);
15170 return true;
15171 /* Flavors of vec_xor. */
15172 case ALTIVEC_BUILTIN_VXOR:
15173 arg0 = gimple_call_arg (stmt, 0);
15174 arg1 = gimple_call_arg (stmt, 1);
15175 lhs = gimple_call_lhs (stmt);
15176 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15177 gimple_set_location (g, gimple_location (stmt));
15178 gsi_replace (gsi, g, true);
15179 return true;
15180 /* Flavors of vec_nor. */
15181 case ALTIVEC_BUILTIN_VNOR:
15182 arg0 = gimple_call_arg (stmt, 0);
15183 arg1 = gimple_call_arg (stmt, 1);
15184 lhs = gimple_call_lhs (stmt);
15185 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15186 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15187 gimple_set_location (g, gimple_location (stmt));
15188 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15189 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15190 gimple_set_location (g, gimple_location (stmt));
15191 gsi_replace (gsi, g, true);
15192 return true;
15193 /* flavors of vec_abs. */
15194 case ALTIVEC_BUILTIN_ABS_V16QI:
15195 case ALTIVEC_BUILTIN_ABS_V8HI:
15196 case ALTIVEC_BUILTIN_ABS_V4SI:
15197 case ALTIVEC_BUILTIN_ABS_V4SF:
15198 case P8V_BUILTIN_ABS_V2DI:
15199 case VSX_BUILTIN_XVABSDP:
15200 arg0 = gimple_call_arg (stmt, 0);
15201 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15202 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15203 return false;
15204 lhs = gimple_call_lhs (stmt);
15205 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15206 gimple_set_location (g, gimple_location (stmt));
15207 gsi_replace (gsi, g, true);
15208 return true;
15209 /* flavors of vec_min. */
15210 case VSX_BUILTIN_XVMINDP:
15211 case P8V_BUILTIN_VMINSD:
15212 case P8V_BUILTIN_VMINUD:
15213 case ALTIVEC_BUILTIN_VMINSB:
15214 case ALTIVEC_BUILTIN_VMINSH:
15215 case ALTIVEC_BUILTIN_VMINSW:
15216 case ALTIVEC_BUILTIN_VMINUB:
15217 case ALTIVEC_BUILTIN_VMINUH:
15218 case ALTIVEC_BUILTIN_VMINUW:
15219 case ALTIVEC_BUILTIN_VMINFP:
15220 arg0 = gimple_call_arg (stmt, 0);
15221 arg1 = gimple_call_arg (stmt, 1);
15222 lhs = gimple_call_lhs (stmt);
15223 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15224 gimple_set_location (g, gimple_location (stmt));
15225 gsi_replace (gsi, g, true);
15226 return true;
15227 /* flavors of vec_max. */
15228 case VSX_BUILTIN_XVMAXDP:
15229 case P8V_BUILTIN_VMAXSD:
15230 case P8V_BUILTIN_VMAXUD:
15231 case ALTIVEC_BUILTIN_VMAXSB:
15232 case ALTIVEC_BUILTIN_VMAXSH:
15233 case ALTIVEC_BUILTIN_VMAXSW:
15234 case ALTIVEC_BUILTIN_VMAXUB:
15235 case ALTIVEC_BUILTIN_VMAXUH:
15236 case ALTIVEC_BUILTIN_VMAXUW:
15237 case ALTIVEC_BUILTIN_VMAXFP:
15238 arg0 = gimple_call_arg (stmt, 0);
15239 arg1 = gimple_call_arg (stmt, 1);
15240 lhs = gimple_call_lhs (stmt);
15241 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15242 gimple_set_location (g, gimple_location (stmt));
15243 gsi_replace (gsi, g, true);
15244 return true;
15245 /* Flavors of vec_eqv. */
15246 case P8V_BUILTIN_EQV_V16QI:
15247 case P8V_BUILTIN_EQV_V8HI:
15248 case P8V_BUILTIN_EQV_V4SI:
15249 case P8V_BUILTIN_EQV_V4SF:
15250 case P8V_BUILTIN_EQV_V2DF:
15251 case P8V_BUILTIN_EQV_V2DI:
15252 arg0 = gimple_call_arg (stmt, 0);
15253 arg1 = gimple_call_arg (stmt, 1);
15254 lhs = gimple_call_lhs (stmt);
15255 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15256 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15257 gimple_set_location (g, gimple_location (stmt));
15258 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15259 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15260 gimple_set_location (g, gimple_location (stmt));
15261 gsi_replace (gsi, g, true);
15262 return true;
15263 /* Flavors of vec_rotate_left. */
15264 case ALTIVEC_BUILTIN_VRLB:
15265 case ALTIVEC_BUILTIN_VRLH:
15266 case ALTIVEC_BUILTIN_VRLW:
15267 case P8V_BUILTIN_VRLD:
15268 arg0 = gimple_call_arg (stmt, 0);
15269 arg1 = gimple_call_arg (stmt, 1);
15270 lhs = gimple_call_lhs (stmt);
15271 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15272 gimple_set_location (g, gimple_location (stmt));
15273 gsi_replace (gsi, g, true);
15274 return true;
15275 /* Flavors of vector shift right algebraic.
15276 vec_sra{b,h,w} -> vsra{b,h,w}. */
15277 case ALTIVEC_BUILTIN_VSRAB:
15278 case ALTIVEC_BUILTIN_VSRAH:
15279 case ALTIVEC_BUILTIN_VSRAW:
15280 case P8V_BUILTIN_VSRAD:
15281 {
15282 arg0 = gimple_call_arg (stmt, 0);
15283 arg1 = gimple_call_arg (stmt, 1);
15284 lhs = gimple_call_lhs (stmt);
15285 tree arg1_type = TREE_TYPE (arg1);
15286 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15287 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15288 location_t loc = gimple_location (stmt);
15289 /* Force arg1 into the range valid matching the arg0 type. */
15290 /* Build a vector consisting of the max valid bit-size values. */
15291 int n_elts = VECTOR_CST_NELTS (arg1);
15292 tree element_size = build_int_cst (unsigned_element_type,
15293 128 / n_elts);
15294 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15295 for (int i = 0; i < n_elts; i++)
15296 elts.safe_push (element_size);
15297 tree modulo_tree = elts.build ();
15298 /* Modulo the provided shift value against that vector. */
15299 gimple_seq stmts = NULL;
15300 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15301 unsigned_arg1_type, arg1);
15302 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15303 unsigned_arg1_type, unsigned_arg1,
15304 modulo_tree);
15305 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15306 /* And finally, do the shift. */
15307 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
15308 gimple_set_location (g, loc);
15309 gsi_replace (gsi, g, true);
15310 return true;
15311 }
15312 /* Flavors of vector shift left.
15313 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15314 case ALTIVEC_BUILTIN_VSLB:
15315 case ALTIVEC_BUILTIN_VSLH:
15316 case ALTIVEC_BUILTIN_VSLW:
15317 case P8V_BUILTIN_VSLD:
15318 {
15319 location_t loc;
15320 gimple_seq stmts = NULL;
15321 arg0 = gimple_call_arg (stmt, 0);
15322 tree arg0_type = TREE_TYPE (arg0);
15323 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
15324 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
15325 return false;
15326 arg1 = gimple_call_arg (stmt, 1);
15327 tree arg1_type = TREE_TYPE (arg1);
15328 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15329 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15330 loc = gimple_location (stmt);
15331 lhs = gimple_call_lhs (stmt);
15332 /* Force arg1 into the range valid matching the arg0 type. */
15333 /* Build a vector consisting of the max valid bit-size values. */
15334 int n_elts = VECTOR_CST_NELTS (arg1);
15335 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
15336 * BITS_PER_UNIT;
15337 tree element_size = build_int_cst (unsigned_element_type,
15338 tree_size_in_bits / n_elts);
15339 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
15340 for (int i = 0; i < n_elts; i++)
15341 elts.safe_push (element_size);
15342 tree modulo_tree = elts.build ();
15343 /* Modulo the provided shift value against that vector. */
15344 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15345 unsigned_arg1_type, arg1);
15346 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15347 unsigned_arg1_type, unsigned_arg1,
15348 modulo_tree);
15349 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15350 /* And finally, do the shift. */
15351 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
15352 gimple_set_location (g, gimple_location (stmt));
15353 gsi_replace (gsi, g, true);
15354 return true;
15355 }
15356 /* Flavors of vector shift right. */
15357 case ALTIVEC_BUILTIN_VSRB:
15358 case ALTIVEC_BUILTIN_VSRH:
15359 case ALTIVEC_BUILTIN_VSRW:
15360 case P8V_BUILTIN_VSRD:
15361 {
15362 arg0 = gimple_call_arg (stmt, 0);
15363 arg1 = gimple_call_arg (stmt, 1);
15364 lhs = gimple_call_lhs (stmt);
15365 tree arg1_type = TREE_TYPE (arg1);
15366 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15367 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15368 location_t loc = gimple_location (stmt);
15369 gimple_seq stmts = NULL;
15370 /* Convert arg0 to unsigned. */
15371 tree arg0_unsigned
15372 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15373 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15374 /* Force arg1 into the range valid matching the arg0 type. */
15375 /* Build a vector consisting of the max valid bit-size values. */
15376 int n_elts = VECTOR_CST_NELTS (arg1);
15377 tree element_size = build_int_cst (unsigned_element_type,
15378 128 / n_elts);
15379 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15380 for (int i = 0; i < n_elts; i++)
15381 elts.safe_push (element_size);
15382 tree modulo_tree = elts.build ();
15383 /* Modulo the provided shift value against that vector. */
15384 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15385 unsigned_arg1_type, arg1);
15386 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15387 unsigned_arg1_type, unsigned_arg1,
15388 modulo_tree);
15389 /* Do the shift. */
15390 tree res
15391 = gimple_build (&stmts, RSHIFT_EXPR,
15392 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
15393 /* Convert result back to the lhs type. */
15394 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15395 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15396 update_call_from_tree (gsi, res);
15397 return true;
15398 }
15399 /* Vector loads. */
15400 case ALTIVEC_BUILTIN_LVX_V16QI:
15401 case ALTIVEC_BUILTIN_LVX_V8HI:
15402 case ALTIVEC_BUILTIN_LVX_V4SI:
15403 case ALTIVEC_BUILTIN_LVX_V4SF:
15404 case ALTIVEC_BUILTIN_LVX_V2DI:
15405 case ALTIVEC_BUILTIN_LVX_V2DF:
15406 case ALTIVEC_BUILTIN_LVX_V1TI:
15407 {
15408 arg0 = gimple_call_arg (stmt, 0); // offset
15409 arg1 = gimple_call_arg (stmt, 1); // address
15410 lhs = gimple_call_lhs (stmt);
15411 location_t loc = gimple_location (stmt);
15412 /* Since arg1 may be cast to a different type, just use ptr_type_node
15413 here instead of trying to enforce TBAA on pointer types. */
15414 tree arg1_type = ptr_type_node;
15415 tree lhs_type = TREE_TYPE (lhs);
15416 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15417 the tree using the value from arg0. The resulting type will match
15418 the type of arg1. */
15419 gimple_seq stmts = NULL;
15420 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15421 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15422 arg1_type, arg1, temp_offset);
15423 /* Mask off any lower bits from the address. */
15424 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15425 arg1_type, temp_addr,
15426 build_int_cst (arg1_type, -16));
15427 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15428 if (!is_gimple_mem_ref_addr (aligned_addr))
15429 {
15430 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15431 gimple *g = gimple_build_assign (t, aligned_addr);
15432 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15433 aligned_addr = t;
15434 }
15435 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15436 take an offset, but since we've already incorporated the offset
15437 above, here we just pass in a zero. */
15438 gimple *g
15439 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15440 build_int_cst (arg1_type, 0)));
15441 gimple_set_location (g, loc);
15442 gsi_replace (gsi, g, true);
15443 return true;
15444 }
15445 /* Vector stores. */
15446 case ALTIVEC_BUILTIN_STVX_V16QI:
15447 case ALTIVEC_BUILTIN_STVX_V8HI:
15448 case ALTIVEC_BUILTIN_STVX_V4SI:
15449 case ALTIVEC_BUILTIN_STVX_V4SF:
15450 case ALTIVEC_BUILTIN_STVX_V2DI:
15451 case ALTIVEC_BUILTIN_STVX_V2DF:
15452 {
15453 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15454 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15455 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15456 location_t loc = gimple_location (stmt);
15457 tree arg0_type = TREE_TYPE (arg0);
15458 /* Use ptr_type_node (no TBAA) for the arg2_type.
15459 FIXME: (Richard) "A proper fix would be to transition this type as
15460 seen from the frontend to GIMPLE, for example in a similar way we
15461 do for MEM_REFs by piggy-backing that on an extra argument, a
15462 constant zero pointer of the alias pointer type to use (which would
15463 also serve as a type indicator of the store itself). I'd use a
15464 target specific internal function for this (not sure if we can have
15465 those target specific, but I guess if it's folded away then that's
15466 fine) and get away with the overload set." */
15467 tree arg2_type = ptr_type_node;
15468 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15469 the tree using the value from arg0. The resulting type will match
15470 the type of arg2. */
15471 gimple_seq stmts = NULL;
15472 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15473 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15474 arg2_type, arg2, temp_offset);
15475 /* Mask off any lower bits from the address. */
15476 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15477 arg2_type, temp_addr,
15478 build_int_cst (arg2_type, -16));
15479 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15480 if (!is_gimple_mem_ref_addr (aligned_addr))
15481 {
15482 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15483 gimple *g = gimple_build_assign (t, aligned_addr);
15484 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15485 aligned_addr = t;
15486 }
15487 /* The desired gimple result should be similar to:
15488 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15489 gimple *g
15490 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15491 build_int_cst (arg2_type, 0)), arg0);
15492 gimple_set_location (g, loc);
15493 gsi_replace (gsi, g, true);
15494 return true;
15495 }
15496
15497 /* unaligned Vector loads. */
15498 case VSX_BUILTIN_LXVW4X_V16QI:
15499 case VSX_BUILTIN_LXVW4X_V8HI:
15500 case VSX_BUILTIN_LXVW4X_V4SF:
15501 case VSX_BUILTIN_LXVW4X_V4SI:
15502 case VSX_BUILTIN_LXVD2X_V2DF:
15503 case VSX_BUILTIN_LXVD2X_V2DI:
15504 {
15505 arg0 = gimple_call_arg (stmt, 0); // offset
15506 arg1 = gimple_call_arg (stmt, 1); // address
15507 lhs = gimple_call_lhs (stmt);
15508 location_t loc = gimple_location (stmt);
15509 /* Since arg1 may be cast to a different type, just use ptr_type_node
15510 here instead of trying to enforce TBAA on pointer types. */
15511 tree arg1_type = ptr_type_node;
15512 tree lhs_type = TREE_TYPE (lhs);
15513 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15514 required alignment (power) is 4 bytes regardless of data type. */
15515 tree align_ltype = build_aligned_type (lhs_type, 4);
15516 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15517 the tree using the value from arg0. The resulting type will match
15518 the type of arg1. */
15519 gimple_seq stmts = NULL;
15520 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15521 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15522 arg1_type, arg1, temp_offset);
15523 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15524 if (!is_gimple_mem_ref_addr (temp_addr))
15525 {
15526 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15527 gimple *g = gimple_build_assign (t, temp_addr);
15528 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15529 temp_addr = t;
15530 }
15531 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15532 take an offset, but since we've already incorporated the offset
15533 above, here we just pass in a zero. */
15534 gimple *g;
15535 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
15536 build_int_cst (arg1_type, 0)));
15537 gimple_set_location (g, loc);
15538 gsi_replace (gsi, g, true);
15539 return true;
15540 }
15541
15542 /* unaligned Vector stores. */
15543 case VSX_BUILTIN_STXVW4X_V16QI:
15544 case VSX_BUILTIN_STXVW4X_V8HI:
15545 case VSX_BUILTIN_STXVW4X_V4SF:
15546 case VSX_BUILTIN_STXVW4X_V4SI:
15547 case VSX_BUILTIN_STXVD2X_V2DF:
15548 case VSX_BUILTIN_STXVD2X_V2DI:
15549 {
15550 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15551 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15552 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15553 location_t loc = gimple_location (stmt);
15554 tree arg0_type = TREE_TYPE (arg0);
15555 /* Use ptr_type_node (no TBAA) for the arg2_type. */
15556 tree arg2_type = ptr_type_node;
15557 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15558 required alignment (power) is 4 bytes regardless of data type. */
15559 tree align_stype = build_aligned_type (arg0_type, 4);
15560 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15561 the tree using the value from arg1. */
15562 gimple_seq stmts = NULL;
15563 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15564 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15565 arg2_type, arg2, temp_offset);
15566 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15567 if (!is_gimple_mem_ref_addr (temp_addr))
15568 {
15569 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15570 gimple *g = gimple_build_assign (t, temp_addr);
15571 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15572 temp_addr = t;
15573 }
15574 gimple *g;
15575 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
15576 build_int_cst (arg2_type, 0)), arg0);
15577 gimple_set_location (g, loc);
15578 gsi_replace (gsi, g, true);
15579 return true;
15580 }
15581
15582 /* Vector Fused multiply-add (fma). */
15583 case ALTIVEC_BUILTIN_VMADDFP:
15584 case VSX_BUILTIN_XVMADDDP:
15585 case ALTIVEC_BUILTIN_VMLADDUHM:
15586 {
15587 arg0 = gimple_call_arg (stmt, 0);
15588 arg1 = gimple_call_arg (stmt, 1);
15589 tree arg2 = gimple_call_arg (stmt, 2);
15590 lhs = gimple_call_lhs (stmt);
15591 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15592 gimple_call_set_lhs (g, lhs);
15593 gimple_call_set_nothrow (g, true);
15594 gimple_set_location (g, gimple_location (stmt));
15595 gsi_replace (gsi, g, true);
15596 return true;
15597 }
15598
15599 /* Vector compares; EQ, NE, GE, GT, LE. */
15600 case ALTIVEC_BUILTIN_VCMPEQUB:
15601 case ALTIVEC_BUILTIN_VCMPEQUH:
15602 case ALTIVEC_BUILTIN_VCMPEQUW:
15603 case P8V_BUILTIN_VCMPEQUD:
15604 fold_compare_helper (gsi, EQ_EXPR, stmt);
15605 return true;
15606
15607 case P9V_BUILTIN_CMPNEB:
15608 case P9V_BUILTIN_CMPNEH:
15609 case P9V_BUILTIN_CMPNEW:
15610 fold_compare_helper (gsi, NE_EXPR, stmt);
15611 return true;
15612
15613 case VSX_BUILTIN_CMPGE_16QI:
15614 case VSX_BUILTIN_CMPGE_U16QI:
15615 case VSX_BUILTIN_CMPGE_8HI:
15616 case VSX_BUILTIN_CMPGE_U8HI:
15617 case VSX_BUILTIN_CMPGE_4SI:
15618 case VSX_BUILTIN_CMPGE_U4SI:
15619 case VSX_BUILTIN_CMPGE_2DI:
15620 case VSX_BUILTIN_CMPGE_U2DI:
15621 fold_compare_helper (gsi, GE_EXPR, stmt);
15622 return true;
15623
15624 case ALTIVEC_BUILTIN_VCMPGTSB:
15625 case ALTIVEC_BUILTIN_VCMPGTUB:
15626 case ALTIVEC_BUILTIN_VCMPGTSH:
15627 case ALTIVEC_BUILTIN_VCMPGTUH:
15628 case ALTIVEC_BUILTIN_VCMPGTSW:
15629 case ALTIVEC_BUILTIN_VCMPGTUW:
15630 case P8V_BUILTIN_VCMPGTUD:
15631 case P8V_BUILTIN_VCMPGTSD:
15632 fold_compare_helper (gsi, GT_EXPR, stmt);
15633 return true;
15634
15635 case VSX_BUILTIN_CMPLE_16QI:
15636 case VSX_BUILTIN_CMPLE_U16QI:
15637 case VSX_BUILTIN_CMPLE_8HI:
15638 case VSX_BUILTIN_CMPLE_U8HI:
15639 case VSX_BUILTIN_CMPLE_4SI:
15640 case VSX_BUILTIN_CMPLE_U4SI:
15641 case VSX_BUILTIN_CMPLE_2DI:
15642 case VSX_BUILTIN_CMPLE_U2DI:
15643 fold_compare_helper (gsi, LE_EXPR, stmt);
15644 return true;
15645
15646 /* flavors of vec_splat_[us]{8,16,32}. */
15647 case ALTIVEC_BUILTIN_VSPLTISB:
15648 case ALTIVEC_BUILTIN_VSPLTISH:
15649 case ALTIVEC_BUILTIN_VSPLTISW:
15650 {
15651 arg0 = gimple_call_arg (stmt, 0);
15652 lhs = gimple_call_lhs (stmt);
15653
15654 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15655 5-bit signed constant in range -16 to +15. */
15656 if (TREE_CODE (arg0) != INTEGER_CST
15657 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
15658 return false;
15659 gimple_seq stmts = NULL;
15660 location_t loc = gimple_location (stmt);
15661 tree splat_value = gimple_convert (&stmts, loc,
15662 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15663 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15664 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15665 g = gimple_build_assign (lhs, splat_tree);
15666 gimple_set_location (g, gimple_location (stmt));
15667 gsi_replace (gsi, g, true);
15668 return true;
15669 }
15670
15671 /* Flavors of vec_splat. */
15672 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
15673 case ALTIVEC_BUILTIN_VSPLTB:
15674 case ALTIVEC_BUILTIN_VSPLTH:
15675 case ALTIVEC_BUILTIN_VSPLTW:
15676 case VSX_BUILTIN_XXSPLTD_V2DI:
15677 case VSX_BUILTIN_XXSPLTD_V2DF:
15678 {
15679 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
15680 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
15681 /* Only fold the vec_splat_*() if arg1 is both a constant value and
15682 is a valid index into the arg0 vector. */
15683 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
15684 if (TREE_CODE (arg1) != INTEGER_CST
15685 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
15686 return false;
15687 lhs = gimple_call_lhs (stmt);
15688 tree lhs_type = TREE_TYPE (lhs);
15689 tree arg0_type = TREE_TYPE (arg0);
15690 tree splat;
15691 if (TREE_CODE (arg0) == VECTOR_CST)
15692 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
15693 else
15694 {
15695 /* Determine (in bits) the length and start location of the
15696 splat value for a call to the tree_vec_extract helper. */
15697 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
15698 * BITS_PER_UNIT / n_elts;
15699 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
15700 tree len = build_int_cst (bitsizetype, splat_elem_size);
15701 tree start = build_int_cst (bitsizetype, splat_start_bit);
15702 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
15703 len, start);
15704 }
15705 /* And finally, build the new vector. */
15706 tree splat_tree = build_vector_from_val (lhs_type, splat);
15707 g = gimple_build_assign (lhs, splat_tree);
15708 gimple_set_location (g, gimple_location (stmt));
15709 gsi_replace (gsi, g, true);
15710 return true;
15711 }
15712
15713 /* vec_mergel (integrals). */
15714 case ALTIVEC_BUILTIN_VMRGLH:
15715 case ALTIVEC_BUILTIN_VMRGLW:
15716 case VSX_BUILTIN_XXMRGLW_4SI:
15717 case ALTIVEC_BUILTIN_VMRGLB:
15718 case VSX_BUILTIN_VEC_MERGEL_V2DI:
15719 case VSX_BUILTIN_XXMRGLW_4SF:
15720 case VSX_BUILTIN_VEC_MERGEL_V2DF:
15721 fold_mergehl_helper (gsi, stmt, 1);
15722 return true;
15723 /* vec_mergeh (integrals). */
15724 case ALTIVEC_BUILTIN_VMRGHH:
15725 case ALTIVEC_BUILTIN_VMRGHW:
15726 case VSX_BUILTIN_XXMRGHW_4SI:
15727 case ALTIVEC_BUILTIN_VMRGHB:
15728 case VSX_BUILTIN_VEC_MERGEH_V2DI:
15729 case VSX_BUILTIN_XXMRGHW_4SF:
15730 case VSX_BUILTIN_VEC_MERGEH_V2DF:
15731 fold_mergehl_helper (gsi, stmt, 0);
15732 return true;
15733
15734 /* Flavors of vec_mergee. */
15735 case P8V_BUILTIN_VMRGEW_V4SI:
15736 case P8V_BUILTIN_VMRGEW_V2DI:
15737 case P8V_BUILTIN_VMRGEW_V4SF:
15738 case P8V_BUILTIN_VMRGEW_V2DF:
15739 fold_mergeeo_helper (gsi, stmt, 0);
15740 return true;
15741 /* Flavors of vec_mergeo. */
15742 case P8V_BUILTIN_VMRGOW_V4SI:
15743 case P8V_BUILTIN_VMRGOW_V2DI:
15744 case P8V_BUILTIN_VMRGOW_V4SF:
15745 case P8V_BUILTIN_VMRGOW_V2DF:
15746 fold_mergeeo_helper (gsi, stmt, 1);
15747 return true;
15748
15749 /* d = vec_pack (a, b) */
15750 case P8V_BUILTIN_VPKUDUM:
15751 case ALTIVEC_BUILTIN_VPKUHUM:
15752 case ALTIVEC_BUILTIN_VPKUWUM:
15753 {
15754 arg0 = gimple_call_arg (stmt, 0);
15755 arg1 = gimple_call_arg (stmt, 1);
15756 lhs = gimple_call_lhs (stmt);
15757 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
15758 gimple_set_location (g, gimple_location (stmt));
15759 gsi_replace (gsi, g, true);
15760 return true;
15761 }
15762
15763 /* d = vec_unpackh (a) */
15764 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
15765 in this code is sensitive to endian-ness, and needs to be inverted to
15766 handle both LE and BE targets. */
15767 case ALTIVEC_BUILTIN_VUPKHSB:
15768 case ALTIVEC_BUILTIN_VUPKHSH:
15769 case P8V_BUILTIN_VUPKHSW:
15770 {
15771 arg0 = gimple_call_arg (stmt, 0);
15772 lhs = gimple_call_lhs (stmt);
15773 if (BYTES_BIG_ENDIAN)
15774 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15775 else
15776 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15777 gimple_set_location (g, gimple_location (stmt));
15778 gsi_replace (gsi, g, true);
15779 return true;
15780 }
15781 /* d = vec_unpackl (a) */
15782 case ALTIVEC_BUILTIN_VUPKLSB:
15783 case ALTIVEC_BUILTIN_VUPKLSH:
15784 case P8V_BUILTIN_VUPKLSW:
15785 {
15786 arg0 = gimple_call_arg (stmt, 0);
15787 lhs = gimple_call_lhs (stmt);
15788 if (BYTES_BIG_ENDIAN)
15789 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15790 else
15791 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15792 gimple_set_location (g, gimple_location (stmt));
15793 gsi_replace (gsi, g, true);
15794 return true;
15795 }
15796 /* There is no gimple type corresponding with pixel, so just return. */
15797 case ALTIVEC_BUILTIN_VUPKHPX:
15798 case ALTIVEC_BUILTIN_VUPKLPX:
15799 return false;
15800
15801 /* vec_perm. */
15802 case ALTIVEC_BUILTIN_VPERM_16QI:
15803 case ALTIVEC_BUILTIN_VPERM_8HI:
15804 case ALTIVEC_BUILTIN_VPERM_4SI:
15805 case ALTIVEC_BUILTIN_VPERM_2DI:
15806 case ALTIVEC_BUILTIN_VPERM_4SF:
15807 case ALTIVEC_BUILTIN_VPERM_2DF:
15808 {
15809 arg0 = gimple_call_arg (stmt, 0);
15810 arg1 = gimple_call_arg (stmt, 1);
15811 tree permute = gimple_call_arg (stmt, 2);
15812 lhs = gimple_call_lhs (stmt);
15813 location_t loc = gimple_location (stmt);
15814 gimple_seq stmts = NULL;
15815 // convert arg0 and arg1 to match the type of the permute
15816 // for the VEC_PERM_EXPR operation.
15817 tree permute_type = (TREE_TYPE (permute));
15818 tree arg0_ptype = gimple_convert (&stmts, loc, permute_type, arg0);
15819 tree arg1_ptype = gimple_convert (&stmts, loc, permute_type, arg1);
15820 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
15821 permute_type, arg0_ptype, arg1_ptype,
15822 permute);
15823 // Convert the result back to the desired lhs type upon completion.
15824 tree temp = gimple_convert (&stmts, loc, TREE_TYPE (lhs), lhs_ptype);
15825 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15826 g = gimple_build_assign (lhs, temp);
15827 gimple_set_location (g, loc);
15828 gsi_replace (gsi, g, true);
15829 return true;
15830 }
15831
15832 default:
15833 if (TARGET_DEBUG_BUILTIN)
15834 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
15835 fn_code, fn_name1, fn_name2);
15836 break;
15837 }
15838
15839 return false;
15840 }
15841
15842 /* Expand an expression EXP that calls a built-in function,
15843 with result going to TARGET if that's convenient
15844 (and in mode MODE if that's convenient).
15845 SUBTARGET may be used as the target for computing one of EXP's operands.
15846 IGNORE is nonzero if the value is to be ignored. */
15847
15848 static rtx
15849 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15850 machine_mode mode ATTRIBUTE_UNUSED,
15851 int ignore ATTRIBUTE_UNUSED)
15852 {
15853 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15854 enum rs6000_builtins fcode
15855 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15856 size_t uns_fcode = (size_t)fcode;
15857 const struct builtin_description *d;
15858 size_t i;
15859 rtx ret;
15860 bool success;
15861 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15862 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15863 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15864
15865 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
15866 floating point type, depending on whether long double is the IBM extended
15867 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
15868 we only define one variant of the built-in function, and switch the code
15869 when defining it, rather than defining two built-ins and using the
15870 overload table in rs6000-c.c to switch between the two. If we don't have
15871 the proper assembler, don't do this switch because CODE_FOR_*kf* and
15872 CODE_FOR_*tf* will be CODE_FOR_nothing. */
15873 if (FLOAT128_IEEE_P (TFmode))
15874 switch (icode)
15875 {
15876 default:
15877 break;
15878
15879 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
15880 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
15881 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
15882 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
15883 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
15884 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
15885 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
15886 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
15887 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
15888 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
15889 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
15890 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
15891 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
15892 }
15893
15894 if (TARGET_DEBUG_BUILTIN)
15895 {
15896 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15897 const char *name2 = (icode != CODE_FOR_nothing)
15898 ? get_insn_name ((int) icode)
15899 : "nothing";
15900 const char *name3;
15901
15902 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15903 {
15904 default: name3 = "unknown"; break;
15905 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15906 case RS6000_BTC_UNARY: name3 = "unary"; break;
15907 case RS6000_BTC_BINARY: name3 = "binary"; break;
15908 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15909 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15910 case RS6000_BTC_ABS: name3 = "abs"; break;
15911 case RS6000_BTC_DST: name3 = "dst"; break;
15912 }
15913
15914
15915 fprintf (stderr,
15916 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15917 (name1) ? name1 : "---", fcode,
15918 (name2) ? name2 : "---", (int) icode,
15919 name3,
15920 func_valid_p ? "" : ", not valid");
15921 }
15922
15923 if (!func_valid_p)
15924 {
15925 rs6000_invalid_builtin (fcode);
15926
15927 /* Given it is invalid, just generate a normal call. */
15928 return expand_call (exp, target, ignore);
15929 }
15930
15931 switch (fcode)
15932 {
15933 case RS6000_BUILTIN_RECIP:
15934 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15935
15936 case RS6000_BUILTIN_RECIPF:
15937 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15938
15939 case RS6000_BUILTIN_RSQRTF:
15940 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15941
15942 case RS6000_BUILTIN_RSQRT:
15943 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15944
15945 case POWER7_BUILTIN_BPERMD:
15946 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15947 ? CODE_FOR_bpermd_di
15948 : CODE_FOR_bpermd_si), exp, target);
15949
15950 case RS6000_BUILTIN_GET_TB:
15951 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15952 target);
15953
15954 case RS6000_BUILTIN_MFTB:
15955 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15956 ? CODE_FOR_rs6000_mftb_di
15957 : CODE_FOR_rs6000_mftb_si),
15958 target);
15959
15960 case RS6000_BUILTIN_MFFS:
15961 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15962
15963 case RS6000_BUILTIN_MTFSB0:
15964 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
15965
15966 case RS6000_BUILTIN_MTFSB1:
15967 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
15968
15969 case RS6000_BUILTIN_SET_FPSCR_RN:
15970 return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
15971 exp);
15972
15973 case RS6000_BUILTIN_SET_FPSCR_DRN:
15974 return
15975 rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
15976 exp);
15977
15978 case RS6000_BUILTIN_MFFSL:
15979 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
15980
15981 case RS6000_BUILTIN_MTFSF:
15982 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15983
15984 case RS6000_BUILTIN_CPU_INIT:
15985 case RS6000_BUILTIN_CPU_IS:
15986 case RS6000_BUILTIN_CPU_SUPPORTS:
15987 return cpu_expand_builtin (fcode, exp, target);
15988
15989 case MISC_BUILTIN_SPEC_BARRIER:
15990 {
15991 emit_insn (gen_speculation_barrier ());
15992 return NULL_RTX;
15993 }
15994
15995 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15996 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15997 {
15998 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15999 : (int) CODE_FOR_altivec_lvsl_direct);
16000 machine_mode tmode = insn_data[icode2].operand[0].mode;
16001 machine_mode mode = insn_data[icode2].operand[1].mode;
16002 tree arg;
16003 rtx op, addr, pat;
16004
16005 gcc_assert (TARGET_ALTIVEC);
16006
16007 arg = CALL_EXPR_ARG (exp, 0);
16008 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16009 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16010 addr = memory_address (mode, op);
16011 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16012 op = addr;
16013 else
16014 {
16015 /* For the load case need to negate the address. */
16016 op = gen_reg_rtx (GET_MODE (addr));
16017 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16018 }
16019 op = gen_rtx_MEM (mode, op);
16020
16021 if (target == 0
16022 || GET_MODE (target) != tmode
16023 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16024 target = gen_reg_rtx (tmode);
16025
16026 pat = GEN_FCN (icode2) (target, op);
16027 if (!pat)
16028 return 0;
16029 emit_insn (pat);
16030
16031 return target;
16032 }
16033
16034 case ALTIVEC_BUILTIN_VCFUX:
16035 case ALTIVEC_BUILTIN_VCFSX:
16036 case ALTIVEC_BUILTIN_VCTUXS:
16037 case ALTIVEC_BUILTIN_VCTSXS:
16038 /* FIXME: There's got to be a nicer way to handle this case than
16039 constructing a new CALL_EXPR. */
16040 if (call_expr_nargs (exp) == 1)
16041 {
16042 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16043 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16044 }
16045 break;
16046
16047 /* For the pack and unpack int128 routines, fix up the builtin so it
16048 uses the correct IBM128 type. */
16049 case MISC_BUILTIN_PACK_IF:
16050 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16051 {
16052 icode = CODE_FOR_packtf;
16053 fcode = MISC_BUILTIN_PACK_TF;
16054 uns_fcode = (size_t)fcode;
16055 }
16056 break;
16057
16058 case MISC_BUILTIN_UNPACK_IF:
16059 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16060 {
16061 icode = CODE_FOR_unpacktf;
16062 fcode = MISC_BUILTIN_UNPACK_TF;
16063 uns_fcode = (size_t)fcode;
16064 }
16065 break;
16066
16067 default:
16068 break;
16069 }
16070
16071 if (TARGET_ALTIVEC)
16072 {
16073 ret = altivec_expand_builtin (exp, target, &success);
16074
16075 if (success)
16076 return ret;
16077 }
16078 if (TARGET_HTM)
16079 {
16080 ret = htm_expand_builtin (exp, target, &success);
16081
16082 if (success)
16083 return ret;
16084 }
16085
16086 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16087 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16088 gcc_assert (attr == RS6000_BTC_UNARY
16089 || attr == RS6000_BTC_BINARY
16090 || attr == RS6000_BTC_TERNARY
16091 || attr == RS6000_BTC_SPECIAL);
16092
16093 /* Handle simple unary operations. */
16094 d = bdesc_1arg;
16095 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16096 if (d->code == fcode)
16097 return rs6000_expand_unop_builtin (icode, exp, target);
16098
16099 /* Handle simple binary operations. */
16100 d = bdesc_2arg;
16101 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16102 if (d->code == fcode)
16103 return rs6000_expand_binop_builtin (icode, exp, target);
16104
16105 /* Handle simple ternary operations. */
16106 d = bdesc_3arg;
16107 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16108 if (d->code == fcode)
16109 return rs6000_expand_ternop_builtin (icode, exp, target);
16110
16111 /* Handle simple no-argument operations. */
16112 d = bdesc_0arg;
16113 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16114 if (d->code == fcode)
16115 return rs6000_expand_zeroop_builtin (icode, target);
16116
16117 gcc_unreachable ();
16118 }
16119
16120 /* Create a builtin vector type with a name. Taking care not to give
16121 the canonical type a name. */
16122
16123 static tree
16124 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16125 {
16126 tree result = build_vector_type (elt_type, num_elts);
16127
16128 /* Copy so we don't give the canonical type a name. */
16129 result = build_variant_type_copy (result);
16130
16131 add_builtin_type (name, result);
16132
16133 return result;
16134 }
16135
16136 static void
16137 rs6000_init_builtins (void)
16138 {
16139 tree tdecl;
16140 tree ftype;
16141 machine_mode mode;
16142
16143 if (TARGET_DEBUG_BUILTIN)
16144 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16145 (TARGET_ALTIVEC) ? ", altivec" : "",
16146 (TARGET_VSX) ? ", vsx" : "");
16147
16148 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16149 : "__vector long long",
16150 intDI_type_node, 2);
16151 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16152 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16153 intSI_type_node, 4);
16154 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16155 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16156 intHI_type_node, 8);
16157 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16158 intQI_type_node, 16);
16159
16160 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16161 unsigned_intQI_type_node, 16);
16162 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16163 unsigned_intHI_type_node, 8);
16164 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16165 unsigned_intSI_type_node, 4);
16166 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16167 ? "__vector unsigned long"
16168 : "__vector unsigned long long",
16169 unsigned_intDI_type_node, 2);
16170
16171 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16172
16173 const_str_type_node
16174 = build_pointer_type (build_qualified_type (char_type_node,
16175 TYPE_QUAL_CONST));
16176
16177 /* We use V1TI mode as a special container to hold __int128_t items that
16178 must live in VSX registers. */
16179 if (intTI_type_node)
16180 {
16181 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16182 intTI_type_node, 1);
16183 unsigned_V1TI_type_node
16184 = rs6000_vector_type ("__vector unsigned __int128",
16185 unsigned_intTI_type_node, 1);
16186 }
16187
16188 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16189 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16190 'vector unsigned short'. */
16191
16192 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16193 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16194 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16195 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16196 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16197
16198 long_integer_type_internal_node = long_integer_type_node;
16199 long_unsigned_type_internal_node = long_unsigned_type_node;
16200 long_long_integer_type_internal_node = long_long_integer_type_node;
16201 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16202 intQI_type_internal_node = intQI_type_node;
16203 uintQI_type_internal_node = unsigned_intQI_type_node;
16204 intHI_type_internal_node = intHI_type_node;
16205 uintHI_type_internal_node = unsigned_intHI_type_node;
16206 intSI_type_internal_node = intSI_type_node;
16207 uintSI_type_internal_node = unsigned_intSI_type_node;
16208 intDI_type_internal_node = intDI_type_node;
16209 uintDI_type_internal_node = unsigned_intDI_type_node;
16210 intTI_type_internal_node = intTI_type_node;
16211 uintTI_type_internal_node = unsigned_intTI_type_node;
16212 float_type_internal_node = float_type_node;
16213 double_type_internal_node = double_type_node;
16214 long_double_type_internal_node = long_double_type_node;
16215 dfloat64_type_internal_node = dfloat64_type_node;
16216 dfloat128_type_internal_node = dfloat128_type_node;
16217 void_type_internal_node = void_type_node;
16218
16219 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16220 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16221 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16222 format that uses a pair of doubles, depending on the switches and
16223 defaults.
16224
16225 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16226 floating point, we need make sure the type is non-zero or else self-test
16227 fails during bootstrap.
16228
16229 Always create __ibm128 as a separate type, even if the current long double
16230 format is IBM extended double.
16231
16232 For IEEE 128-bit floating point, always create the type __ieee128. If the
16233 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16234 __ieee128. */
16235 if (TARGET_FLOAT128_TYPE)
16236 {
16237 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16238 ibm128_float_type_node = long_double_type_node;
16239 else
16240 {
16241 ibm128_float_type_node = make_node (REAL_TYPE);
16242 TYPE_PRECISION (ibm128_float_type_node) = 128;
16243 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16244 layout_type (ibm128_float_type_node);
16245 }
16246
16247 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16248 "__ibm128");
16249
16250 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16251 ieee128_float_type_node = long_double_type_node;
16252 else
16253 ieee128_float_type_node = float128_type_node;
16254
16255 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16256 "__ieee128");
16257 }
16258
16259 else
16260 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16261
16262 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16263 tree type node. */
16264 builtin_mode_to_type[QImode][0] = integer_type_node;
16265 builtin_mode_to_type[HImode][0] = integer_type_node;
16266 builtin_mode_to_type[SImode][0] = intSI_type_node;
16267 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16268 builtin_mode_to_type[DImode][0] = intDI_type_node;
16269 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16270 builtin_mode_to_type[TImode][0] = intTI_type_node;
16271 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16272 builtin_mode_to_type[SFmode][0] = float_type_node;
16273 builtin_mode_to_type[DFmode][0] = double_type_node;
16274 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16275 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16276 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16277 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16278 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16279 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16280 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16281 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16282 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16283 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16284 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16285 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16286 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16287 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16288 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16289 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16290 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16291
16292 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16293 TYPE_NAME (bool_char_type_node) = tdecl;
16294
16295 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16296 TYPE_NAME (bool_short_type_node) = tdecl;
16297
16298 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16299 TYPE_NAME (bool_int_type_node) = tdecl;
16300
16301 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16302 TYPE_NAME (pixel_type_node) = tdecl;
16303
16304 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16305 bool_char_type_node, 16);
16306 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16307 bool_short_type_node, 8);
16308 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16309 bool_int_type_node, 4);
16310 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16311 ? "__vector __bool long"
16312 : "__vector __bool long long",
16313 bool_long_long_type_node, 2);
16314 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16315 pixel_type_node, 8);
16316
16317 /* Create Altivec and VSX builtins on machines with at least the
16318 general purpose extensions (970 and newer) to allow the use of
16319 the target attribute. */
16320 if (TARGET_EXTRA_BUILTINS)
16321 altivec_init_builtins ();
16322 if (TARGET_HTM)
16323 htm_init_builtins ();
16324
16325 if (TARGET_EXTRA_BUILTINS)
16326 rs6000_common_init_builtins ();
16327
16328 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16329 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16330 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16331
16332 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16333 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16334 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16335
16336 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16337 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16338 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16339
16340 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16341 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16342 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16343
16344 mode = (TARGET_64BIT) ? DImode : SImode;
16345 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16346 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16347 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16348
16349 ftype = build_function_type_list (unsigned_intDI_type_node,
16350 NULL_TREE);
16351 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16352
16353 if (TARGET_64BIT)
16354 ftype = build_function_type_list (unsigned_intDI_type_node,
16355 NULL_TREE);
16356 else
16357 ftype = build_function_type_list (unsigned_intSI_type_node,
16358 NULL_TREE);
16359 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16360
16361 ftype = build_function_type_list (double_type_node, NULL_TREE);
16362 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16363
16364 ftype = build_function_type_list (double_type_node, NULL_TREE);
16365 def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
16366
16367 ftype = build_function_type_list (void_type_node,
16368 intSI_type_node,
16369 NULL_TREE);
16370 def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
16371
16372 ftype = build_function_type_list (void_type_node,
16373 intSI_type_node,
16374 NULL_TREE);
16375 def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
16376
16377 ftype = build_function_type_list (void_type_node,
16378 intDI_type_node,
16379 NULL_TREE);
16380 def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
16381
16382 ftype = build_function_type_list (void_type_node,
16383 intDI_type_node,
16384 NULL_TREE);
16385 def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
16386
16387 ftype = build_function_type_list (void_type_node,
16388 intSI_type_node, double_type_node,
16389 NULL_TREE);
16390 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16391
16392 ftype = build_function_type_list (void_type_node, NULL_TREE);
16393 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16394 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16395 MISC_BUILTIN_SPEC_BARRIER);
16396
16397 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16398 NULL_TREE);
16399 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16400 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16401
16402 /* AIX libm provides clog as __clog. */
16403 if (TARGET_XCOFF &&
16404 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16405 set_user_assembler_name (tdecl, "__clog");
16406
16407 #ifdef SUBTARGET_INIT_BUILTINS
16408 SUBTARGET_INIT_BUILTINS;
16409 #endif
16410 }
16411
16412 /* Returns the rs6000 builtin decl for CODE. */
16413
16414 static tree
16415 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16416 {
16417 HOST_WIDE_INT fnmask;
16418
16419 if (code >= RS6000_BUILTIN_COUNT)
16420 return error_mark_node;
16421
16422 fnmask = rs6000_builtin_info[code].mask;
16423 if ((fnmask & rs6000_builtin_mask) != fnmask)
16424 {
16425 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16426 return error_mark_node;
16427 }
16428
16429 return rs6000_builtin_decls[code];
16430 }
16431
16432 static void
16433 altivec_init_builtins (void)
16434 {
16435 const struct builtin_description *d;
16436 size_t i;
16437 tree ftype;
16438 tree decl;
16439 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16440
16441 tree pvoid_type_node = build_pointer_type (void_type_node);
16442
16443 tree pcvoid_type_node
16444 = build_pointer_type (build_qualified_type (void_type_node,
16445 TYPE_QUAL_CONST));
16446
16447 tree int_ftype_opaque
16448 = build_function_type_list (integer_type_node,
16449 opaque_V4SI_type_node, NULL_TREE);
16450 tree opaque_ftype_opaque
16451 = build_function_type_list (integer_type_node, NULL_TREE);
16452 tree opaque_ftype_opaque_int
16453 = build_function_type_list (opaque_V4SI_type_node,
16454 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16455 tree opaque_ftype_opaque_opaque_int
16456 = build_function_type_list (opaque_V4SI_type_node,
16457 opaque_V4SI_type_node, opaque_V4SI_type_node,
16458 integer_type_node, NULL_TREE);
16459 tree opaque_ftype_opaque_opaque_opaque
16460 = build_function_type_list (opaque_V4SI_type_node,
16461 opaque_V4SI_type_node, opaque_V4SI_type_node,
16462 opaque_V4SI_type_node, NULL_TREE);
16463 tree opaque_ftype_opaque_opaque
16464 = build_function_type_list (opaque_V4SI_type_node,
16465 opaque_V4SI_type_node, opaque_V4SI_type_node,
16466 NULL_TREE);
16467 tree int_ftype_int_opaque_opaque
16468 = build_function_type_list (integer_type_node,
16469 integer_type_node, opaque_V4SI_type_node,
16470 opaque_V4SI_type_node, NULL_TREE);
16471 tree int_ftype_int_v4si_v4si
16472 = build_function_type_list (integer_type_node,
16473 integer_type_node, V4SI_type_node,
16474 V4SI_type_node, NULL_TREE);
16475 tree int_ftype_int_v2di_v2di
16476 = build_function_type_list (integer_type_node,
16477 integer_type_node, V2DI_type_node,
16478 V2DI_type_node, NULL_TREE);
16479 tree void_ftype_v4si
16480 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16481 tree v8hi_ftype_void
16482 = build_function_type_list (V8HI_type_node, NULL_TREE);
16483 tree void_ftype_void
16484 = build_function_type_list (void_type_node, NULL_TREE);
16485 tree void_ftype_int
16486 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16487
16488 tree opaque_ftype_long_pcvoid
16489 = build_function_type_list (opaque_V4SI_type_node,
16490 long_integer_type_node, pcvoid_type_node,
16491 NULL_TREE);
16492 tree v16qi_ftype_long_pcvoid
16493 = build_function_type_list (V16QI_type_node,
16494 long_integer_type_node, pcvoid_type_node,
16495 NULL_TREE);
16496 tree v8hi_ftype_long_pcvoid
16497 = build_function_type_list (V8HI_type_node,
16498 long_integer_type_node, pcvoid_type_node,
16499 NULL_TREE);
16500 tree v4si_ftype_long_pcvoid
16501 = build_function_type_list (V4SI_type_node,
16502 long_integer_type_node, pcvoid_type_node,
16503 NULL_TREE);
16504 tree v4sf_ftype_long_pcvoid
16505 = build_function_type_list (V4SF_type_node,
16506 long_integer_type_node, pcvoid_type_node,
16507 NULL_TREE);
16508 tree v2df_ftype_long_pcvoid
16509 = build_function_type_list (V2DF_type_node,
16510 long_integer_type_node, pcvoid_type_node,
16511 NULL_TREE);
16512 tree v2di_ftype_long_pcvoid
16513 = build_function_type_list (V2DI_type_node,
16514 long_integer_type_node, pcvoid_type_node,
16515 NULL_TREE);
16516 tree v1ti_ftype_long_pcvoid
16517 = build_function_type_list (V1TI_type_node,
16518 long_integer_type_node, pcvoid_type_node,
16519 NULL_TREE);
16520
16521 tree void_ftype_opaque_long_pvoid
16522 = build_function_type_list (void_type_node,
16523 opaque_V4SI_type_node, long_integer_type_node,
16524 pvoid_type_node, NULL_TREE);
16525 tree void_ftype_v4si_long_pvoid
16526 = build_function_type_list (void_type_node,
16527 V4SI_type_node, long_integer_type_node,
16528 pvoid_type_node, NULL_TREE);
16529 tree void_ftype_v16qi_long_pvoid
16530 = build_function_type_list (void_type_node,
16531 V16QI_type_node, long_integer_type_node,
16532 pvoid_type_node, NULL_TREE);
16533
16534 tree void_ftype_v16qi_pvoid_long
16535 = build_function_type_list (void_type_node,
16536 V16QI_type_node, pvoid_type_node,
16537 long_integer_type_node, NULL_TREE);
16538
16539 tree void_ftype_v8hi_long_pvoid
16540 = build_function_type_list (void_type_node,
16541 V8HI_type_node, long_integer_type_node,
16542 pvoid_type_node, NULL_TREE);
16543 tree void_ftype_v4sf_long_pvoid
16544 = build_function_type_list (void_type_node,
16545 V4SF_type_node, long_integer_type_node,
16546 pvoid_type_node, NULL_TREE);
16547 tree void_ftype_v2df_long_pvoid
16548 = build_function_type_list (void_type_node,
16549 V2DF_type_node, long_integer_type_node,
16550 pvoid_type_node, NULL_TREE);
16551 tree void_ftype_v1ti_long_pvoid
16552 = build_function_type_list (void_type_node,
16553 V1TI_type_node, long_integer_type_node,
16554 pvoid_type_node, NULL_TREE);
16555 tree void_ftype_v2di_long_pvoid
16556 = build_function_type_list (void_type_node,
16557 V2DI_type_node, long_integer_type_node,
16558 pvoid_type_node, NULL_TREE);
16559 tree int_ftype_int_v8hi_v8hi
16560 = build_function_type_list (integer_type_node,
16561 integer_type_node, V8HI_type_node,
16562 V8HI_type_node, NULL_TREE);
16563 tree int_ftype_int_v16qi_v16qi
16564 = build_function_type_list (integer_type_node,
16565 integer_type_node, V16QI_type_node,
16566 V16QI_type_node, NULL_TREE);
16567 tree int_ftype_int_v4sf_v4sf
16568 = build_function_type_list (integer_type_node,
16569 integer_type_node, V4SF_type_node,
16570 V4SF_type_node, NULL_TREE);
16571 tree int_ftype_int_v2df_v2df
16572 = build_function_type_list (integer_type_node,
16573 integer_type_node, V2DF_type_node,
16574 V2DF_type_node, NULL_TREE);
16575 tree v2di_ftype_v2di
16576 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16577 tree v4si_ftype_v4si
16578 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16579 tree v8hi_ftype_v8hi
16580 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16581 tree v16qi_ftype_v16qi
16582 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16583 tree v4sf_ftype_v4sf
16584 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16585 tree v2df_ftype_v2df
16586 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16587 tree void_ftype_pcvoid_int_int
16588 = build_function_type_list (void_type_node,
16589 pcvoid_type_node, integer_type_node,
16590 integer_type_node, NULL_TREE);
16591
16592 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16593 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16594 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16595 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16596 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16597 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16598 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16599 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16600 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16601 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16602 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16603 ALTIVEC_BUILTIN_LVXL_V2DF);
16604 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16605 ALTIVEC_BUILTIN_LVXL_V2DI);
16606 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16607 ALTIVEC_BUILTIN_LVXL_V4SF);
16608 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16609 ALTIVEC_BUILTIN_LVXL_V4SI);
16610 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16611 ALTIVEC_BUILTIN_LVXL_V8HI);
16612 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16613 ALTIVEC_BUILTIN_LVXL_V16QI);
16614 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16615 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16616 ALTIVEC_BUILTIN_LVX_V1TI);
16617 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16618 ALTIVEC_BUILTIN_LVX_V2DF);
16619 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16620 ALTIVEC_BUILTIN_LVX_V2DI);
16621 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16622 ALTIVEC_BUILTIN_LVX_V4SF);
16623 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16624 ALTIVEC_BUILTIN_LVX_V4SI);
16625 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16626 ALTIVEC_BUILTIN_LVX_V8HI);
16627 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16628 ALTIVEC_BUILTIN_LVX_V16QI);
16629 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16630 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16631 ALTIVEC_BUILTIN_STVX_V2DF);
16632 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16633 ALTIVEC_BUILTIN_STVX_V2DI);
16634 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16635 ALTIVEC_BUILTIN_STVX_V4SF);
16636 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16637 ALTIVEC_BUILTIN_STVX_V4SI);
16638 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16639 ALTIVEC_BUILTIN_STVX_V8HI);
16640 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16641 ALTIVEC_BUILTIN_STVX_V16QI);
16642 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16643 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16644 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16645 ALTIVEC_BUILTIN_STVXL_V2DF);
16646 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16647 ALTIVEC_BUILTIN_STVXL_V2DI);
16648 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16649 ALTIVEC_BUILTIN_STVXL_V4SF);
16650 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16651 ALTIVEC_BUILTIN_STVXL_V4SI);
16652 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16653 ALTIVEC_BUILTIN_STVXL_V8HI);
16654 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16655 ALTIVEC_BUILTIN_STVXL_V16QI);
16656 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16657 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16658 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16659 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16660 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16661 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16662 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16663 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16664 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16665 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16666 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16667 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16668 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16669 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16670 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16671 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16672
16673 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16674 VSX_BUILTIN_LXVD2X_V2DF);
16675 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16676 VSX_BUILTIN_LXVD2X_V2DI);
16677 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16678 VSX_BUILTIN_LXVW4X_V4SF);
16679 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16680 VSX_BUILTIN_LXVW4X_V4SI);
16681 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16682 VSX_BUILTIN_LXVW4X_V8HI);
16683 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16684 VSX_BUILTIN_LXVW4X_V16QI);
16685 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16686 VSX_BUILTIN_STXVD2X_V2DF);
16687 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16688 VSX_BUILTIN_STXVD2X_V2DI);
16689 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16690 VSX_BUILTIN_STXVW4X_V4SF);
16691 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16692 VSX_BUILTIN_STXVW4X_V4SI);
16693 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16694 VSX_BUILTIN_STXVW4X_V8HI);
16695 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16696 VSX_BUILTIN_STXVW4X_V16QI);
16697
16698 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16699 VSX_BUILTIN_LD_ELEMREV_V2DF);
16700 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16701 VSX_BUILTIN_LD_ELEMREV_V2DI);
16702 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16703 VSX_BUILTIN_LD_ELEMREV_V4SF);
16704 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16705 VSX_BUILTIN_LD_ELEMREV_V4SI);
16706 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16707 VSX_BUILTIN_LD_ELEMREV_V8HI);
16708 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16709 VSX_BUILTIN_LD_ELEMREV_V16QI);
16710 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16711 VSX_BUILTIN_ST_ELEMREV_V2DF);
16712 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16713 VSX_BUILTIN_ST_ELEMREV_V1TI);
16714 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16715 VSX_BUILTIN_ST_ELEMREV_V2DI);
16716 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16717 VSX_BUILTIN_ST_ELEMREV_V4SF);
16718 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16719 VSX_BUILTIN_ST_ELEMREV_V4SI);
16720 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16721 VSX_BUILTIN_ST_ELEMREV_V8HI);
16722 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16723 VSX_BUILTIN_ST_ELEMREV_V16QI);
16724
16725 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16726 VSX_BUILTIN_VEC_LD);
16727 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16728 VSX_BUILTIN_VEC_ST);
16729 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16730 VSX_BUILTIN_VEC_XL);
16731 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16732 VSX_BUILTIN_VEC_XL_BE);
16733 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16734 VSX_BUILTIN_VEC_XST);
16735 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16736 VSX_BUILTIN_VEC_XST_BE);
16737
16738 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16739 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16740 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16741
16742 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16743 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16744 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16745 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16746 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16747 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16748 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16749 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16750 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16751 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16752 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16753 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16754
16755 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16756 ALTIVEC_BUILTIN_VEC_ADDE);
16757 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16758 ALTIVEC_BUILTIN_VEC_ADDEC);
16759 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16760 ALTIVEC_BUILTIN_VEC_CMPNE);
16761 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16762 ALTIVEC_BUILTIN_VEC_MUL);
16763 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16764 ALTIVEC_BUILTIN_VEC_SUBE);
16765 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16766 ALTIVEC_BUILTIN_VEC_SUBEC);
16767
16768 /* Cell builtins. */
16769 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16770 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16771 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16772 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16773
16774 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16775 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16776 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16777 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16778
16779 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16780 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16781 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16782 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16783
16784 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16785 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16786 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16787 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16788
16789 if (TARGET_P9_VECTOR)
16790 {
16791 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16792 P9V_BUILTIN_STXVL);
16793 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16794 P9V_BUILTIN_XST_LEN_R);
16795 }
16796
16797 /* Add the DST variants. */
16798 d = bdesc_dst;
16799 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16800 {
16801 HOST_WIDE_INT mask = d->mask;
16802
16803 /* It is expected that these dst built-in functions may have
16804 d->icode equal to CODE_FOR_nothing. */
16805 if ((mask & builtin_mask) != mask)
16806 {
16807 if (TARGET_DEBUG_BUILTIN)
16808 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16809 d->name);
16810 continue;
16811 }
16812 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16813 }
16814
16815 /* Initialize the predicates. */
16816 d = bdesc_altivec_preds;
16817 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16818 {
16819 machine_mode mode1;
16820 tree type;
16821 HOST_WIDE_INT mask = d->mask;
16822
16823 if ((mask & builtin_mask) != mask)
16824 {
16825 if (TARGET_DEBUG_BUILTIN)
16826 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16827 d->name);
16828 continue;
16829 }
16830
16831 if (rs6000_overloaded_builtin_p (d->code))
16832 mode1 = VOIDmode;
16833 else
16834 {
16835 /* Cannot define builtin if the instruction is disabled. */
16836 gcc_assert (d->icode != CODE_FOR_nothing);
16837 mode1 = insn_data[d->icode].operand[1].mode;
16838 }
16839
16840 switch (mode1)
16841 {
16842 case E_VOIDmode:
16843 type = int_ftype_int_opaque_opaque;
16844 break;
16845 case E_V2DImode:
16846 type = int_ftype_int_v2di_v2di;
16847 break;
16848 case E_V4SImode:
16849 type = int_ftype_int_v4si_v4si;
16850 break;
16851 case E_V8HImode:
16852 type = int_ftype_int_v8hi_v8hi;
16853 break;
16854 case E_V16QImode:
16855 type = int_ftype_int_v16qi_v16qi;
16856 break;
16857 case E_V4SFmode:
16858 type = int_ftype_int_v4sf_v4sf;
16859 break;
16860 case E_V2DFmode:
16861 type = int_ftype_int_v2df_v2df;
16862 break;
16863 default:
16864 gcc_unreachable ();
16865 }
16866
16867 def_builtin (d->name, type, d->code);
16868 }
16869
16870 /* Initialize the abs* operators. */
16871 d = bdesc_abs;
16872 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16873 {
16874 machine_mode mode0;
16875 tree type;
16876 HOST_WIDE_INT mask = d->mask;
16877
16878 if ((mask & builtin_mask) != mask)
16879 {
16880 if (TARGET_DEBUG_BUILTIN)
16881 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
16882 d->name);
16883 continue;
16884 }
16885
16886 /* Cannot define builtin if the instruction is disabled. */
16887 gcc_assert (d->icode != CODE_FOR_nothing);
16888 mode0 = insn_data[d->icode].operand[0].mode;
16889
16890 switch (mode0)
16891 {
16892 case E_V2DImode:
16893 type = v2di_ftype_v2di;
16894 break;
16895 case E_V4SImode:
16896 type = v4si_ftype_v4si;
16897 break;
16898 case E_V8HImode:
16899 type = v8hi_ftype_v8hi;
16900 break;
16901 case E_V16QImode:
16902 type = v16qi_ftype_v16qi;
16903 break;
16904 case E_V4SFmode:
16905 type = v4sf_ftype_v4sf;
16906 break;
16907 case E_V2DFmode:
16908 type = v2df_ftype_v2df;
16909 break;
16910 default:
16911 gcc_unreachable ();
16912 }
16913
16914 def_builtin (d->name, type, d->code);
16915 }
16916
16917 /* Initialize target builtin that implements
16918 targetm.vectorize.builtin_mask_for_load. */
16919
16920 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16921 v16qi_ftype_long_pcvoid,
16922 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16923 BUILT_IN_MD, NULL, NULL_TREE);
16924 TREE_READONLY (decl) = 1;
16925 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16926 altivec_builtin_mask_for_load = decl;
16927
16928 /* Access to the vec_init patterns. */
16929 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16930 integer_type_node, integer_type_node,
16931 integer_type_node, NULL_TREE);
16932 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16933
16934 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16935 short_integer_type_node,
16936 short_integer_type_node,
16937 short_integer_type_node,
16938 short_integer_type_node,
16939 short_integer_type_node,
16940 short_integer_type_node,
16941 short_integer_type_node, NULL_TREE);
16942 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16943
16944 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16945 char_type_node, char_type_node,
16946 char_type_node, char_type_node,
16947 char_type_node, char_type_node,
16948 char_type_node, char_type_node,
16949 char_type_node, char_type_node,
16950 char_type_node, char_type_node,
16951 char_type_node, char_type_node,
16952 char_type_node, NULL_TREE);
16953 def_builtin ("__builtin_vec_init_v16qi", ftype,
16954 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16955
16956 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16957 float_type_node, float_type_node,
16958 float_type_node, NULL_TREE);
16959 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16960
16961 /* VSX builtins. */
16962 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16963 double_type_node, NULL_TREE);
16964 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16965
16966 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16967 intDI_type_node, NULL_TREE);
16968 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16969
16970 /* Access to the vec_set patterns. */
16971 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16972 intSI_type_node,
16973 integer_type_node, NULL_TREE);
16974 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16975
16976 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16977 intHI_type_node,
16978 integer_type_node, NULL_TREE);
16979 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16980
16981 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16982 intQI_type_node,
16983 integer_type_node, NULL_TREE);
16984 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16985
16986 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16987 float_type_node,
16988 integer_type_node, NULL_TREE);
16989 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16990
16991 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16992 double_type_node,
16993 integer_type_node, NULL_TREE);
16994 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16995
16996 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16997 intDI_type_node,
16998 integer_type_node, NULL_TREE);
16999 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17000
17001 /* Access to the vec_extract patterns. */
17002 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17003 integer_type_node, NULL_TREE);
17004 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17005
17006 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17007 integer_type_node, NULL_TREE);
17008 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17009
17010 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17011 integer_type_node, NULL_TREE);
17012 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17013
17014 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17015 integer_type_node, NULL_TREE);
17016 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17017
17018 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17019 integer_type_node, NULL_TREE);
17020 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17021
17022 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17023 integer_type_node, NULL_TREE);
17024 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17025
17026
17027 if (V1TI_type_node)
17028 {
17029 tree v1ti_ftype_long_pcvoid
17030 = build_function_type_list (V1TI_type_node,
17031 long_integer_type_node, pcvoid_type_node,
17032 NULL_TREE);
17033 tree void_ftype_v1ti_long_pvoid
17034 = build_function_type_list (void_type_node,
17035 V1TI_type_node, long_integer_type_node,
17036 pvoid_type_node, NULL_TREE);
17037 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17038 VSX_BUILTIN_LD_ELEMREV_V1TI);
17039 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17040 VSX_BUILTIN_LXVD2X_V1TI);
17041 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17042 VSX_BUILTIN_STXVD2X_V1TI);
17043 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17044 NULL_TREE, NULL_TREE);
17045 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17046 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17047 intTI_type_node,
17048 integer_type_node, NULL_TREE);
17049 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17050 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17051 integer_type_node, NULL_TREE);
17052 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17053 }
17054
17055 }
17056
17057 static void
17058 htm_init_builtins (void)
17059 {
17060 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17061 const struct builtin_description *d;
17062 size_t i;
17063
17064 d = bdesc_htm;
17065 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17066 {
17067 tree op[MAX_HTM_OPERANDS], type;
17068 HOST_WIDE_INT mask = d->mask;
17069 unsigned attr = rs6000_builtin_info[d->code].attr;
17070 bool void_func = (attr & RS6000_BTC_VOID);
17071 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17072 int nopnds = 0;
17073 tree gpr_type_node;
17074 tree rettype;
17075 tree argtype;
17076
17077 /* It is expected that these htm built-in functions may have
17078 d->icode equal to CODE_FOR_nothing. */
17079
17080 if (TARGET_32BIT && TARGET_POWERPC64)
17081 gpr_type_node = long_long_unsigned_type_node;
17082 else
17083 gpr_type_node = long_unsigned_type_node;
17084
17085 if (attr & RS6000_BTC_SPR)
17086 {
17087 rettype = gpr_type_node;
17088 argtype = gpr_type_node;
17089 }
17090 else if (d->code == HTM_BUILTIN_TABORTDC
17091 || d->code == HTM_BUILTIN_TABORTDCI)
17092 {
17093 rettype = unsigned_type_node;
17094 argtype = gpr_type_node;
17095 }
17096 else
17097 {
17098 rettype = unsigned_type_node;
17099 argtype = unsigned_type_node;
17100 }
17101
17102 if ((mask & builtin_mask) != mask)
17103 {
17104 if (TARGET_DEBUG_BUILTIN)
17105 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17106 continue;
17107 }
17108
17109 if (d->name == 0)
17110 {
17111 if (TARGET_DEBUG_BUILTIN)
17112 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17113 (long unsigned) i);
17114 continue;
17115 }
17116
17117 op[nopnds++] = (void_func) ? void_type_node : rettype;
17118
17119 if (attr_args == RS6000_BTC_UNARY)
17120 op[nopnds++] = argtype;
17121 else if (attr_args == RS6000_BTC_BINARY)
17122 {
17123 op[nopnds++] = argtype;
17124 op[nopnds++] = argtype;
17125 }
17126 else if (attr_args == RS6000_BTC_TERNARY)
17127 {
17128 op[nopnds++] = argtype;
17129 op[nopnds++] = argtype;
17130 op[nopnds++] = argtype;
17131 }
17132
17133 switch (nopnds)
17134 {
17135 case 1:
17136 type = build_function_type_list (op[0], NULL_TREE);
17137 break;
17138 case 2:
17139 type = build_function_type_list (op[0], op[1], NULL_TREE);
17140 break;
17141 case 3:
17142 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17143 break;
17144 case 4:
17145 type = build_function_type_list (op[0], op[1], op[2], op[3],
17146 NULL_TREE);
17147 break;
17148 default:
17149 gcc_unreachable ();
17150 }
17151
17152 def_builtin (d->name, type, d->code);
17153 }
17154 }
17155
17156 /* Hash function for builtin functions with up to 3 arguments and a return
17157 type. */
17158 hashval_t
17159 builtin_hasher::hash (builtin_hash_struct *bh)
17160 {
17161 unsigned ret = 0;
17162 int i;
17163
17164 for (i = 0; i < 4; i++)
17165 {
17166 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17167 ret = (ret * 2) + bh->uns_p[i];
17168 }
17169
17170 return ret;
17171 }
17172
17173 /* Compare builtin hash entries H1 and H2 for equivalence. */
17174 bool
17175 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17176 {
17177 return ((p1->mode[0] == p2->mode[0])
17178 && (p1->mode[1] == p2->mode[1])
17179 && (p1->mode[2] == p2->mode[2])
17180 && (p1->mode[3] == p2->mode[3])
17181 && (p1->uns_p[0] == p2->uns_p[0])
17182 && (p1->uns_p[1] == p2->uns_p[1])
17183 && (p1->uns_p[2] == p2->uns_p[2])
17184 && (p1->uns_p[3] == p2->uns_p[3]));
17185 }
17186
17187 /* Map types for builtin functions with an explicit return type and up to 3
17188 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17189 of the argument. */
17190 static tree
17191 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17192 machine_mode mode_arg1, machine_mode mode_arg2,
17193 enum rs6000_builtins builtin, const char *name)
17194 {
17195 struct builtin_hash_struct h;
17196 struct builtin_hash_struct *h2;
17197 int num_args = 3;
17198 int i;
17199 tree ret_type = NULL_TREE;
17200 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17201
17202 /* Create builtin_hash_table. */
17203 if (builtin_hash_table == NULL)
17204 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17205
17206 h.type = NULL_TREE;
17207 h.mode[0] = mode_ret;
17208 h.mode[1] = mode_arg0;
17209 h.mode[2] = mode_arg1;
17210 h.mode[3] = mode_arg2;
17211 h.uns_p[0] = 0;
17212 h.uns_p[1] = 0;
17213 h.uns_p[2] = 0;
17214 h.uns_p[3] = 0;
17215
17216 /* If the builtin is a type that produces unsigned results or takes unsigned
17217 arguments, and it is returned as a decl for the vectorizer (such as
17218 widening multiplies, permute), make sure the arguments and return value
17219 are type correct. */
17220 switch (builtin)
17221 {
17222 /* unsigned 1 argument functions. */
17223 case CRYPTO_BUILTIN_VSBOX:
17224 case CRYPTO_BUILTIN_VSBOX_BE:
17225 case P8V_BUILTIN_VGBBD:
17226 case MISC_BUILTIN_CDTBCD:
17227 case MISC_BUILTIN_CBCDTD:
17228 h.uns_p[0] = 1;
17229 h.uns_p[1] = 1;
17230 break;
17231
17232 /* unsigned 2 argument functions. */
17233 case ALTIVEC_BUILTIN_VMULEUB:
17234 case ALTIVEC_BUILTIN_VMULEUH:
17235 case P8V_BUILTIN_VMULEUW:
17236 case ALTIVEC_BUILTIN_VMULOUB:
17237 case ALTIVEC_BUILTIN_VMULOUH:
17238 case P8V_BUILTIN_VMULOUW:
17239 case CRYPTO_BUILTIN_VCIPHER:
17240 case CRYPTO_BUILTIN_VCIPHER_BE:
17241 case CRYPTO_BUILTIN_VCIPHERLAST:
17242 case CRYPTO_BUILTIN_VCIPHERLAST_BE:
17243 case CRYPTO_BUILTIN_VNCIPHER:
17244 case CRYPTO_BUILTIN_VNCIPHER_BE:
17245 case CRYPTO_BUILTIN_VNCIPHERLAST:
17246 case CRYPTO_BUILTIN_VNCIPHERLAST_BE:
17247 case CRYPTO_BUILTIN_VPMSUMB:
17248 case CRYPTO_BUILTIN_VPMSUMH:
17249 case CRYPTO_BUILTIN_VPMSUMW:
17250 case CRYPTO_BUILTIN_VPMSUMD:
17251 case CRYPTO_BUILTIN_VPMSUM:
17252 case MISC_BUILTIN_ADDG6S:
17253 case MISC_BUILTIN_DIVWEU:
17254 case MISC_BUILTIN_DIVDEU:
17255 case VSX_BUILTIN_UDIV_V2DI:
17256 case ALTIVEC_BUILTIN_VMAXUB:
17257 case ALTIVEC_BUILTIN_VMINUB:
17258 case ALTIVEC_BUILTIN_VMAXUH:
17259 case ALTIVEC_BUILTIN_VMINUH:
17260 case ALTIVEC_BUILTIN_VMAXUW:
17261 case ALTIVEC_BUILTIN_VMINUW:
17262 case P8V_BUILTIN_VMAXUD:
17263 case P8V_BUILTIN_VMINUD:
17264 h.uns_p[0] = 1;
17265 h.uns_p[1] = 1;
17266 h.uns_p[2] = 1;
17267 break;
17268
17269 /* unsigned 3 argument functions. */
17270 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17271 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17272 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17273 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17274 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17275 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17276 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17277 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17278 case VSX_BUILTIN_VPERM_16QI_UNS:
17279 case VSX_BUILTIN_VPERM_8HI_UNS:
17280 case VSX_BUILTIN_VPERM_4SI_UNS:
17281 case VSX_BUILTIN_VPERM_2DI_UNS:
17282 case VSX_BUILTIN_XXSEL_16QI_UNS:
17283 case VSX_BUILTIN_XXSEL_8HI_UNS:
17284 case VSX_BUILTIN_XXSEL_4SI_UNS:
17285 case VSX_BUILTIN_XXSEL_2DI_UNS:
17286 case CRYPTO_BUILTIN_VPERMXOR:
17287 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17288 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17289 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17290 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17291 case CRYPTO_BUILTIN_VSHASIGMAW:
17292 case CRYPTO_BUILTIN_VSHASIGMAD:
17293 case CRYPTO_BUILTIN_VSHASIGMA:
17294 h.uns_p[0] = 1;
17295 h.uns_p[1] = 1;
17296 h.uns_p[2] = 1;
17297 h.uns_p[3] = 1;
17298 break;
17299
17300 /* signed permute functions with unsigned char mask. */
17301 case ALTIVEC_BUILTIN_VPERM_16QI:
17302 case ALTIVEC_BUILTIN_VPERM_8HI:
17303 case ALTIVEC_BUILTIN_VPERM_4SI:
17304 case ALTIVEC_BUILTIN_VPERM_4SF:
17305 case ALTIVEC_BUILTIN_VPERM_2DI:
17306 case ALTIVEC_BUILTIN_VPERM_2DF:
17307 case VSX_BUILTIN_VPERM_16QI:
17308 case VSX_BUILTIN_VPERM_8HI:
17309 case VSX_BUILTIN_VPERM_4SI:
17310 case VSX_BUILTIN_VPERM_4SF:
17311 case VSX_BUILTIN_VPERM_2DI:
17312 case VSX_BUILTIN_VPERM_2DF:
17313 h.uns_p[3] = 1;
17314 break;
17315
17316 /* unsigned args, signed return. */
17317 case VSX_BUILTIN_XVCVUXDSP:
17318 case VSX_BUILTIN_XVCVUXDDP_UNS:
17319 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17320 h.uns_p[1] = 1;
17321 break;
17322
17323 /* signed args, unsigned return. */
17324 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17325 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17326 case MISC_BUILTIN_UNPACK_TD:
17327 case MISC_BUILTIN_UNPACK_V1TI:
17328 h.uns_p[0] = 1;
17329 break;
17330
17331 /* unsigned arguments, bool return (compares). */
17332 case ALTIVEC_BUILTIN_VCMPEQUB:
17333 case ALTIVEC_BUILTIN_VCMPEQUH:
17334 case ALTIVEC_BUILTIN_VCMPEQUW:
17335 case P8V_BUILTIN_VCMPEQUD:
17336 case VSX_BUILTIN_CMPGE_U16QI:
17337 case VSX_BUILTIN_CMPGE_U8HI:
17338 case VSX_BUILTIN_CMPGE_U4SI:
17339 case VSX_BUILTIN_CMPGE_U2DI:
17340 case ALTIVEC_BUILTIN_VCMPGTUB:
17341 case ALTIVEC_BUILTIN_VCMPGTUH:
17342 case ALTIVEC_BUILTIN_VCMPGTUW:
17343 case P8V_BUILTIN_VCMPGTUD:
17344 h.uns_p[1] = 1;
17345 h.uns_p[2] = 1;
17346 break;
17347
17348 /* unsigned arguments for 128-bit pack instructions. */
17349 case MISC_BUILTIN_PACK_TD:
17350 case MISC_BUILTIN_PACK_V1TI:
17351 h.uns_p[1] = 1;
17352 h.uns_p[2] = 1;
17353 break;
17354
17355 /* unsigned second arguments (vector shift right). */
17356 case ALTIVEC_BUILTIN_VSRB:
17357 case ALTIVEC_BUILTIN_VSRH:
17358 case ALTIVEC_BUILTIN_VSRW:
17359 case P8V_BUILTIN_VSRD:
17360 h.uns_p[2] = 1;
17361 break;
17362
17363 default:
17364 break;
17365 }
17366
17367 /* Figure out how many args are present. */
17368 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17369 num_args--;
17370
17371 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17372 if (!ret_type && h.uns_p[0])
17373 ret_type = builtin_mode_to_type[h.mode[0]][0];
17374
17375 if (!ret_type)
17376 fatal_error (input_location,
17377 "internal error: builtin function %qs had an unexpected "
17378 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17379
17380 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17381 arg_type[i] = NULL_TREE;
17382
17383 for (i = 0; i < num_args; i++)
17384 {
17385 int m = (int) h.mode[i+1];
17386 int uns_p = h.uns_p[i+1];
17387
17388 arg_type[i] = builtin_mode_to_type[m][uns_p];
17389 if (!arg_type[i] && uns_p)
17390 arg_type[i] = builtin_mode_to_type[m][0];
17391
17392 if (!arg_type[i])
17393 fatal_error (input_location,
17394 "internal error: builtin function %qs, argument %d "
17395 "had unexpected argument type %qs", name, i,
17396 GET_MODE_NAME (m));
17397 }
17398
17399 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17400 if (*found == NULL)
17401 {
17402 h2 = ggc_alloc<builtin_hash_struct> ();
17403 *h2 = h;
17404 *found = h2;
17405
17406 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17407 arg_type[2], NULL_TREE);
17408 }
17409
17410 return (*found)->type;
17411 }
17412
17413 static void
17414 rs6000_common_init_builtins (void)
17415 {
17416 const struct builtin_description *d;
17417 size_t i;
17418
17419 tree opaque_ftype_opaque = NULL_TREE;
17420 tree opaque_ftype_opaque_opaque = NULL_TREE;
17421 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17422 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17423
17424 /* Create Altivec and VSX builtins on machines with at least the
17425 general purpose extensions (970 and newer) to allow the use of
17426 the target attribute. */
17427
17428 if (TARGET_EXTRA_BUILTINS)
17429 builtin_mask |= RS6000_BTM_COMMON;
17430
17431 /* Add the ternary operators. */
17432 d = bdesc_3arg;
17433 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17434 {
17435 tree type;
17436 HOST_WIDE_INT mask = d->mask;
17437
17438 if ((mask & builtin_mask) != mask)
17439 {
17440 if (TARGET_DEBUG_BUILTIN)
17441 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17442 continue;
17443 }
17444
17445 if (rs6000_overloaded_builtin_p (d->code))
17446 {
17447 if (! (type = opaque_ftype_opaque_opaque_opaque))
17448 type = opaque_ftype_opaque_opaque_opaque
17449 = build_function_type_list (opaque_V4SI_type_node,
17450 opaque_V4SI_type_node,
17451 opaque_V4SI_type_node,
17452 opaque_V4SI_type_node,
17453 NULL_TREE);
17454 }
17455 else
17456 {
17457 enum insn_code icode = d->icode;
17458 if (d->name == 0)
17459 {
17460 if (TARGET_DEBUG_BUILTIN)
17461 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17462 (long unsigned)i);
17463
17464 continue;
17465 }
17466
17467 if (icode == CODE_FOR_nothing)
17468 {
17469 if (TARGET_DEBUG_BUILTIN)
17470 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17471 d->name);
17472
17473 continue;
17474 }
17475
17476 type = builtin_function_type (insn_data[icode].operand[0].mode,
17477 insn_data[icode].operand[1].mode,
17478 insn_data[icode].operand[2].mode,
17479 insn_data[icode].operand[3].mode,
17480 d->code, d->name);
17481 }
17482
17483 def_builtin (d->name, type, d->code);
17484 }
17485
17486 /* Add the binary operators. */
17487 d = bdesc_2arg;
17488 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17489 {
17490 machine_mode mode0, mode1, mode2;
17491 tree type;
17492 HOST_WIDE_INT mask = d->mask;
17493
17494 if ((mask & builtin_mask) != mask)
17495 {
17496 if (TARGET_DEBUG_BUILTIN)
17497 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17498 continue;
17499 }
17500
17501 if (rs6000_overloaded_builtin_p (d->code))
17502 {
17503 if (! (type = opaque_ftype_opaque_opaque))
17504 type = opaque_ftype_opaque_opaque
17505 = build_function_type_list (opaque_V4SI_type_node,
17506 opaque_V4SI_type_node,
17507 opaque_V4SI_type_node,
17508 NULL_TREE);
17509 }
17510 else
17511 {
17512 enum insn_code icode = d->icode;
17513 if (d->name == 0)
17514 {
17515 if (TARGET_DEBUG_BUILTIN)
17516 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17517 (long unsigned)i);
17518
17519 continue;
17520 }
17521
17522 if (icode == CODE_FOR_nothing)
17523 {
17524 if (TARGET_DEBUG_BUILTIN)
17525 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17526 d->name);
17527
17528 continue;
17529 }
17530
17531 mode0 = insn_data[icode].operand[0].mode;
17532 mode1 = insn_data[icode].operand[1].mode;
17533 mode2 = insn_data[icode].operand[2].mode;
17534
17535 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17536 d->code, d->name);
17537 }
17538
17539 def_builtin (d->name, type, d->code);
17540 }
17541
17542 /* Add the simple unary operators. */
17543 d = bdesc_1arg;
17544 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17545 {
17546 machine_mode mode0, mode1;
17547 tree type;
17548 HOST_WIDE_INT mask = d->mask;
17549
17550 if ((mask & builtin_mask) != mask)
17551 {
17552 if (TARGET_DEBUG_BUILTIN)
17553 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17554 continue;
17555 }
17556
17557 if (rs6000_overloaded_builtin_p (d->code))
17558 {
17559 if (! (type = opaque_ftype_opaque))
17560 type = opaque_ftype_opaque
17561 = build_function_type_list (opaque_V4SI_type_node,
17562 opaque_V4SI_type_node,
17563 NULL_TREE);
17564 }
17565 else
17566 {
17567 enum insn_code icode = d->icode;
17568 if (d->name == 0)
17569 {
17570 if (TARGET_DEBUG_BUILTIN)
17571 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17572 (long unsigned)i);
17573
17574 continue;
17575 }
17576
17577 if (icode == CODE_FOR_nothing)
17578 {
17579 if (TARGET_DEBUG_BUILTIN)
17580 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17581 d->name);
17582
17583 continue;
17584 }
17585
17586 mode0 = insn_data[icode].operand[0].mode;
17587 mode1 = insn_data[icode].operand[1].mode;
17588
17589 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17590 d->code, d->name);
17591 }
17592
17593 def_builtin (d->name, type, d->code);
17594 }
17595
17596 /* Add the simple no-argument operators. */
17597 d = bdesc_0arg;
17598 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17599 {
17600 machine_mode mode0;
17601 tree type;
17602 HOST_WIDE_INT mask = d->mask;
17603
17604 if ((mask & builtin_mask) != mask)
17605 {
17606 if (TARGET_DEBUG_BUILTIN)
17607 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17608 continue;
17609 }
17610 if (rs6000_overloaded_builtin_p (d->code))
17611 {
17612 if (!opaque_ftype_opaque)
17613 opaque_ftype_opaque
17614 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17615 type = opaque_ftype_opaque;
17616 }
17617 else
17618 {
17619 enum insn_code icode = d->icode;
17620 if (d->name == 0)
17621 {
17622 if (TARGET_DEBUG_BUILTIN)
17623 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17624 (long unsigned) i);
17625 continue;
17626 }
17627 if (icode == CODE_FOR_nothing)
17628 {
17629 if (TARGET_DEBUG_BUILTIN)
17630 fprintf (stderr,
17631 "rs6000_builtin, skip no-argument %s (no code)\n",
17632 d->name);
17633 continue;
17634 }
17635 mode0 = insn_data[icode].operand[0].mode;
17636 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17637 d->code, d->name);
17638 }
17639 def_builtin (d->name, type, d->code);
17640 }
17641 }
17642
17643 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17644 static void
17645 init_float128_ibm (machine_mode mode)
17646 {
17647 if (!TARGET_XL_COMPAT)
17648 {
17649 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17650 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17651 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17652 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17653
17654 if (!TARGET_HARD_FLOAT)
17655 {
17656 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17657 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17658 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17659 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17660 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17661 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17662 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17663 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17664
17665 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17666 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17667 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17668 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17669 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17670 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17671 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17672 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17673 }
17674 }
17675 else
17676 {
17677 set_optab_libfunc (add_optab, mode, "_xlqadd");
17678 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17679 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17680 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17681 }
17682
17683 /* Add various conversions for IFmode to use the traditional TFmode
17684 names. */
17685 if (mode == IFmode)
17686 {
17687 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
17688 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
17689 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
17690 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
17691 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
17692 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
17693
17694 if (TARGET_POWERPC64)
17695 {
17696 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17697 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17698 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17699 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17700 }
17701 }
17702 }
17703
17704 /* Create a decl for either complex long double multiply or complex long double
17705 divide when long double is IEEE 128-bit floating point. We can't use
17706 __multc3 and __divtc3 because the original long double using IBM extended
17707 double used those names. The complex multiply/divide functions are encoded
17708 as builtin functions with a complex result and 4 scalar inputs. */
17709
17710 static void
17711 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17712 {
17713 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17714 name, NULL_TREE);
17715
17716 set_builtin_decl (fncode, fndecl, true);
17717
17718 if (TARGET_DEBUG_BUILTIN)
17719 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17720
17721 return;
17722 }
17723
17724 /* Set up IEEE 128-bit floating point routines. Use different names if the
17725 arguments can be passed in a vector register. The historical PowerPC
17726 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17727 continue to use that if we aren't using vector registers to pass IEEE
17728 128-bit floating point. */
17729
17730 static void
17731 init_float128_ieee (machine_mode mode)
17732 {
17733 if (FLOAT128_VECTOR_P (mode))
17734 {
17735 static bool complex_muldiv_init_p = false;
17736
17737 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17738 we have clone or target attributes, this will be called a second
17739 time. We want to create the built-in function only once. */
17740 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17741 {
17742 complex_muldiv_init_p = true;
17743 built_in_function fncode_mul =
17744 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17745 - MIN_MODE_COMPLEX_FLOAT);
17746 built_in_function fncode_div =
17747 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17748 - MIN_MODE_COMPLEX_FLOAT);
17749
17750 tree fntype = build_function_type_list (complex_long_double_type_node,
17751 long_double_type_node,
17752 long_double_type_node,
17753 long_double_type_node,
17754 long_double_type_node,
17755 NULL_TREE);
17756
17757 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17758 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17759 }
17760
17761 set_optab_libfunc (add_optab, mode, "__addkf3");
17762 set_optab_libfunc (sub_optab, mode, "__subkf3");
17763 set_optab_libfunc (neg_optab, mode, "__negkf2");
17764 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17765 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17766 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17767 set_optab_libfunc (abs_optab, mode, "__abskf2");
17768 set_optab_libfunc (powi_optab, mode, "__powikf2");
17769
17770 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17771 set_optab_libfunc (ne_optab, mode, "__nekf2");
17772 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17773 set_optab_libfunc (ge_optab, mode, "__gekf2");
17774 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17775 set_optab_libfunc (le_optab, mode, "__lekf2");
17776 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17777
17778 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17779 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17780 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17781 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17782
17783 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17784 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17785 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17786
17787 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17788 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17789 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17790
17791 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
17792 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
17793 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
17794 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
17795 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
17796 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
17797
17798 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17799 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17800 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17801 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17802
17803 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17804 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17805 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17806 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17807
17808 if (TARGET_POWERPC64)
17809 {
17810 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17811 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17812 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17813 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17814 }
17815 }
17816
17817 else
17818 {
17819 set_optab_libfunc (add_optab, mode, "_q_add");
17820 set_optab_libfunc (sub_optab, mode, "_q_sub");
17821 set_optab_libfunc (neg_optab, mode, "_q_neg");
17822 set_optab_libfunc (smul_optab, mode, "_q_mul");
17823 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17824 if (TARGET_PPC_GPOPT)
17825 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17826
17827 set_optab_libfunc (eq_optab, mode, "_q_feq");
17828 set_optab_libfunc (ne_optab, mode, "_q_fne");
17829 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17830 set_optab_libfunc (ge_optab, mode, "_q_fge");
17831 set_optab_libfunc (lt_optab, mode, "_q_flt");
17832 set_optab_libfunc (le_optab, mode, "_q_fle");
17833
17834 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17835 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17836 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17837 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17838 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17839 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17840 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17841 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17842 }
17843 }
17844
17845 static void
17846 rs6000_init_libfuncs (void)
17847 {
17848 /* __float128 support. */
17849 if (TARGET_FLOAT128_TYPE)
17850 {
17851 init_float128_ibm (IFmode);
17852 init_float128_ieee (KFmode);
17853 }
17854
17855 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17856 if (TARGET_LONG_DOUBLE_128)
17857 {
17858 if (!TARGET_IEEEQUAD)
17859 init_float128_ibm (TFmode);
17860
17861 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17862 else
17863 init_float128_ieee (TFmode);
17864 }
17865 }
17866
17867 /* Emit a potentially record-form instruction, setting DST from SRC.
17868 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17869 signed comparison of DST with zero. If DOT is 1, the generated RTL
17870 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17871 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17872 a separate COMPARE. */
17873
17874 void
17875 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17876 {
17877 if (dot == 0)
17878 {
17879 emit_move_insn (dst, src);
17880 return;
17881 }
17882
17883 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17884 {
17885 emit_move_insn (dst, src);
17886 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17887 return;
17888 }
17889
17890 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17891 if (dot == 1)
17892 {
17893 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17894 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17895 }
17896 else
17897 {
17898 rtx set = gen_rtx_SET (dst, src);
17899 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17900 }
17901 }
17902
17903 \f
17904 /* A validation routine: say whether CODE, a condition code, and MODE
17905 match. The other alternatives either don't make sense or should
17906 never be generated. */
17907
17908 void
17909 validate_condition_mode (enum rtx_code code, machine_mode mode)
17910 {
17911 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17912 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17913 && GET_MODE_CLASS (mode) == MODE_CC);
17914
17915 /* These don't make sense. */
17916 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17917 || mode != CCUNSmode);
17918
17919 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17920 || mode == CCUNSmode);
17921
17922 gcc_assert (mode == CCFPmode
17923 || (code != ORDERED && code != UNORDERED
17924 && code != UNEQ && code != LTGT
17925 && code != UNGT && code != UNLT
17926 && code != UNGE && code != UNLE));
17927
17928 /* These should never be generated except for
17929 flag_finite_math_only. */
17930 gcc_assert (mode != CCFPmode
17931 || flag_finite_math_only
17932 || (code != LE && code != GE
17933 && code != UNEQ && code != LTGT
17934 && code != UNGT && code != UNLT));
17935
17936 /* These are invalid; the information is not there. */
17937 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17938 }
17939
17940 \f
17941 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17942 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17943 not zero, store there the bit offset (counted from the right) where
17944 the single stretch of 1 bits begins; and similarly for B, the bit
17945 offset where it ends. */
17946
17947 bool
17948 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17949 {
17950 unsigned HOST_WIDE_INT val = INTVAL (mask);
17951 unsigned HOST_WIDE_INT bit;
17952 int nb, ne;
17953 int n = GET_MODE_PRECISION (mode);
17954
17955 if (mode != DImode && mode != SImode)
17956 return false;
17957
17958 if (INTVAL (mask) >= 0)
17959 {
17960 bit = val & -val;
17961 ne = exact_log2 (bit);
17962 nb = exact_log2 (val + bit);
17963 }
17964 else if (val + 1 == 0)
17965 {
17966 nb = n;
17967 ne = 0;
17968 }
17969 else if (val & 1)
17970 {
17971 val = ~val;
17972 bit = val & -val;
17973 nb = exact_log2 (bit);
17974 ne = exact_log2 (val + bit);
17975 }
17976 else
17977 {
17978 bit = val & -val;
17979 ne = exact_log2 (bit);
17980 if (val + bit == 0)
17981 nb = n;
17982 else
17983 nb = 0;
17984 }
17985
17986 nb--;
17987
17988 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17989 return false;
17990
17991 if (b)
17992 *b = nb;
17993 if (e)
17994 *e = ne;
17995
17996 return true;
17997 }
17998
17999 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18000 or rldicr instruction, to implement an AND with it in mode MODE. */
18001
18002 bool
18003 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18004 {
18005 int nb, ne;
18006
18007 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18008 return false;
18009
18010 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18011 does not wrap. */
18012 if (mode == DImode)
18013 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18014
18015 /* For SImode, rlwinm can do everything. */
18016 if (mode == SImode)
18017 return (nb < 32 && ne < 32);
18018
18019 return false;
18020 }
18021
18022 /* Return the instruction template for an AND with mask in mode MODE, with
18023 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18024
18025 const char *
18026 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18027 {
18028 int nb, ne;
18029
18030 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18031 gcc_unreachable ();
18032
18033 if (mode == DImode && ne == 0)
18034 {
18035 operands[3] = GEN_INT (63 - nb);
18036 if (dot)
18037 return "rldicl. %0,%1,0,%3";
18038 return "rldicl %0,%1,0,%3";
18039 }
18040
18041 if (mode == DImode && nb == 63)
18042 {
18043 operands[3] = GEN_INT (63 - ne);
18044 if (dot)
18045 return "rldicr. %0,%1,0,%3";
18046 return "rldicr %0,%1,0,%3";
18047 }
18048
18049 if (nb < 32 && ne < 32)
18050 {
18051 operands[3] = GEN_INT (31 - nb);
18052 operands[4] = GEN_INT (31 - ne);
18053 if (dot)
18054 return "rlwinm. %0,%1,0,%3,%4";
18055 return "rlwinm %0,%1,0,%3,%4";
18056 }
18057
18058 gcc_unreachable ();
18059 }
18060
18061 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18062 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18063 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18064
18065 bool
18066 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18067 {
18068 int nb, ne;
18069
18070 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18071 return false;
18072
18073 int n = GET_MODE_PRECISION (mode);
18074 int sh = -1;
18075
18076 if (CONST_INT_P (XEXP (shift, 1)))
18077 {
18078 sh = INTVAL (XEXP (shift, 1));
18079 if (sh < 0 || sh >= n)
18080 return false;
18081 }
18082
18083 rtx_code code = GET_CODE (shift);
18084
18085 /* Convert any shift by 0 to a rotate, to simplify below code. */
18086 if (sh == 0)
18087 code = ROTATE;
18088
18089 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18090 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18091 code = ASHIFT;
18092 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18093 {
18094 code = LSHIFTRT;
18095 sh = n - sh;
18096 }
18097
18098 /* DImode rotates need rld*. */
18099 if (mode == DImode && code == ROTATE)
18100 return (nb == 63 || ne == 0 || ne == sh);
18101
18102 /* SImode rotates need rlw*. */
18103 if (mode == SImode && code == ROTATE)
18104 return (nb < 32 && ne < 32 && sh < 32);
18105
18106 /* Wrap-around masks are only okay for rotates. */
18107 if (ne > nb)
18108 return false;
18109
18110 /* Variable shifts are only okay for rotates. */
18111 if (sh < 0)
18112 return false;
18113
18114 /* Don't allow ASHIFT if the mask is wrong for that. */
18115 if (code == ASHIFT && ne < sh)
18116 return false;
18117
18118 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18119 if the mask is wrong for that. */
18120 if (nb < 32 && ne < 32 && sh < 32
18121 && !(code == LSHIFTRT && nb >= 32 - sh))
18122 return true;
18123
18124 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18125 if the mask is wrong for that. */
18126 if (code == LSHIFTRT)
18127 sh = 64 - sh;
18128 if (nb == 63 || ne == 0 || ne == sh)
18129 return !(code == LSHIFTRT && nb >= sh);
18130
18131 return false;
18132 }
18133
18134 /* Return the instruction template for a shift with mask in mode MODE, with
18135 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18136
18137 const char *
18138 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18139 {
18140 int nb, ne;
18141
18142 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18143 gcc_unreachable ();
18144
18145 if (mode == DImode && ne == 0)
18146 {
18147 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18148 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18149 operands[3] = GEN_INT (63 - nb);
18150 if (dot)
18151 return "rld%I2cl. %0,%1,%2,%3";
18152 return "rld%I2cl %0,%1,%2,%3";
18153 }
18154
18155 if (mode == DImode && nb == 63)
18156 {
18157 operands[3] = GEN_INT (63 - ne);
18158 if (dot)
18159 return "rld%I2cr. %0,%1,%2,%3";
18160 return "rld%I2cr %0,%1,%2,%3";
18161 }
18162
18163 if (mode == DImode
18164 && GET_CODE (operands[4]) != LSHIFTRT
18165 && CONST_INT_P (operands[2])
18166 && ne == INTVAL (operands[2]))
18167 {
18168 operands[3] = GEN_INT (63 - nb);
18169 if (dot)
18170 return "rld%I2c. %0,%1,%2,%3";
18171 return "rld%I2c %0,%1,%2,%3";
18172 }
18173
18174 if (nb < 32 && ne < 32)
18175 {
18176 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18177 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18178 operands[3] = GEN_INT (31 - nb);
18179 operands[4] = GEN_INT (31 - ne);
18180 /* This insn can also be a 64-bit rotate with mask that really makes
18181 it just a shift right (with mask); the %h below are to adjust for
18182 that situation (shift count is >= 32 in that case). */
18183 if (dot)
18184 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18185 return "rlw%I2nm %0,%1,%h2,%3,%4";
18186 }
18187
18188 gcc_unreachable ();
18189 }
18190
18191 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18192 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18193 ASHIFT, or LSHIFTRT) in mode MODE. */
18194
18195 bool
18196 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18197 {
18198 int nb, ne;
18199
18200 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18201 return false;
18202
18203 int n = GET_MODE_PRECISION (mode);
18204
18205 int sh = INTVAL (XEXP (shift, 1));
18206 if (sh < 0 || sh >= n)
18207 return false;
18208
18209 rtx_code code = GET_CODE (shift);
18210
18211 /* Convert any shift by 0 to a rotate, to simplify below code. */
18212 if (sh == 0)
18213 code = ROTATE;
18214
18215 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18216 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18217 code = ASHIFT;
18218 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18219 {
18220 code = LSHIFTRT;
18221 sh = n - sh;
18222 }
18223
18224 /* DImode rotates need rldimi. */
18225 if (mode == DImode && code == ROTATE)
18226 return (ne == sh);
18227
18228 /* SImode rotates need rlwimi. */
18229 if (mode == SImode && code == ROTATE)
18230 return (nb < 32 && ne < 32 && sh < 32);
18231
18232 /* Wrap-around masks are only okay for rotates. */
18233 if (ne > nb)
18234 return false;
18235
18236 /* Don't allow ASHIFT if the mask is wrong for that. */
18237 if (code == ASHIFT && ne < sh)
18238 return false;
18239
18240 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18241 if the mask is wrong for that. */
18242 if (nb < 32 && ne < 32 && sh < 32
18243 && !(code == LSHIFTRT && nb >= 32 - sh))
18244 return true;
18245
18246 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18247 if the mask is wrong for that. */
18248 if (code == LSHIFTRT)
18249 sh = 64 - sh;
18250 if (ne == sh)
18251 return !(code == LSHIFTRT && nb >= sh);
18252
18253 return false;
18254 }
18255
18256 /* Return the instruction template for an insert with mask in mode MODE, with
18257 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18258
18259 const char *
18260 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18261 {
18262 int nb, ne;
18263
18264 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18265 gcc_unreachable ();
18266
18267 /* Prefer rldimi because rlwimi is cracked. */
18268 if (TARGET_POWERPC64
18269 && (!dot || mode == DImode)
18270 && GET_CODE (operands[4]) != LSHIFTRT
18271 && ne == INTVAL (operands[2]))
18272 {
18273 operands[3] = GEN_INT (63 - nb);
18274 if (dot)
18275 return "rldimi. %0,%1,%2,%3";
18276 return "rldimi %0,%1,%2,%3";
18277 }
18278
18279 if (nb < 32 && ne < 32)
18280 {
18281 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18282 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18283 operands[3] = GEN_INT (31 - nb);
18284 operands[4] = GEN_INT (31 - ne);
18285 if (dot)
18286 return "rlwimi. %0,%1,%2,%3,%4";
18287 return "rlwimi %0,%1,%2,%3,%4";
18288 }
18289
18290 gcc_unreachable ();
18291 }
18292
18293 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18294 using two machine instructions. */
18295
18296 bool
18297 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18298 {
18299 /* There are two kinds of AND we can handle with two insns:
18300 1) those we can do with two rl* insn;
18301 2) ori[s];xori[s].
18302
18303 We do not handle that last case yet. */
18304
18305 /* If there is just one stretch of ones, we can do it. */
18306 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18307 return true;
18308
18309 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18310 one insn, we can do the whole thing with two. */
18311 unsigned HOST_WIDE_INT val = INTVAL (c);
18312 unsigned HOST_WIDE_INT bit1 = val & -val;
18313 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18314 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18315 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18316 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18317 }
18318
18319 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18320 If EXPAND is true, split rotate-and-mask instructions we generate to
18321 their constituent parts as well (this is used during expand); if DOT
18322 is 1, make the last insn a record-form instruction clobbering the
18323 destination GPR and setting the CC reg (from operands[3]); if 2, set
18324 that GPR as well as the CC reg. */
18325
18326 void
18327 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18328 {
18329 gcc_assert (!(expand && dot));
18330
18331 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18332
18333 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18334 shift right. This generates better code than doing the masks without
18335 shifts, or shifting first right and then left. */
18336 int nb, ne;
18337 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18338 {
18339 gcc_assert (mode == DImode);
18340
18341 int shift = 63 - nb;
18342 if (expand)
18343 {
18344 rtx tmp1 = gen_reg_rtx (DImode);
18345 rtx tmp2 = gen_reg_rtx (DImode);
18346 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18347 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18348 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18349 }
18350 else
18351 {
18352 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18353 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18354 emit_move_insn (operands[0], tmp);
18355 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18356 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18357 }
18358 return;
18359 }
18360
18361 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18362 that does the rest. */
18363 unsigned HOST_WIDE_INT bit1 = val & -val;
18364 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18365 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18366 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18367
18368 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18369 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18370
18371 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18372
18373 /* Two "no-rotate"-and-mask instructions, for SImode. */
18374 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18375 {
18376 gcc_assert (mode == SImode);
18377
18378 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18379 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18380 emit_move_insn (reg, tmp);
18381 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18382 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18383 return;
18384 }
18385
18386 gcc_assert (mode == DImode);
18387
18388 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18389 insns; we have to do the first in SImode, because it wraps. */
18390 if (mask2 <= 0xffffffff
18391 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18392 {
18393 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18394 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18395 GEN_INT (mask1));
18396 rtx reg_low = gen_lowpart (SImode, reg);
18397 emit_move_insn (reg_low, tmp);
18398 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18399 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18400 return;
18401 }
18402
18403 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18404 at the top end), rotate back and clear the other hole. */
18405 int right = exact_log2 (bit3);
18406 int left = 64 - right;
18407
18408 /* Rotate the mask too. */
18409 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18410
18411 if (expand)
18412 {
18413 rtx tmp1 = gen_reg_rtx (DImode);
18414 rtx tmp2 = gen_reg_rtx (DImode);
18415 rtx tmp3 = gen_reg_rtx (DImode);
18416 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18417 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18418 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18419 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18420 }
18421 else
18422 {
18423 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18424 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18425 emit_move_insn (operands[0], tmp);
18426 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18427 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18428 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18429 }
18430 }
18431 \f
18432 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18433 for lfq and stfq insns iff the registers are hard registers. */
18434
18435 int
18436 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18437 {
18438 /* We might have been passed a SUBREG. */
18439 if (!REG_P (reg1) || !REG_P (reg2))
18440 return 0;
18441
18442 /* We might have been passed non floating point registers. */
18443 if (!FP_REGNO_P (REGNO (reg1))
18444 || !FP_REGNO_P (REGNO (reg2)))
18445 return 0;
18446
18447 return (REGNO (reg1) == REGNO (reg2) - 1);
18448 }
18449
18450 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18451 addr1 and addr2 must be in consecutive memory locations
18452 (addr2 == addr1 + 8). */
18453
18454 int
18455 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18456 {
18457 rtx addr1, addr2;
18458 unsigned int reg1, reg2;
18459 int offset1, offset2;
18460
18461 /* The mems cannot be volatile. */
18462 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18463 return 0;
18464
18465 addr1 = XEXP (mem1, 0);
18466 addr2 = XEXP (mem2, 0);
18467
18468 /* Extract an offset (if used) from the first addr. */
18469 if (GET_CODE (addr1) == PLUS)
18470 {
18471 /* If not a REG, return zero. */
18472 if (!REG_P (XEXP (addr1, 0)))
18473 return 0;
18474 else
18475 {
18476 reg1 = REGNO (XEXP (addr1, 0));
18477 /* The offset must be constant! */
18478 if (!CONST_INT_P (XEXP (addr1, 1)))
18479 return 0;
18480 offset1 = INTVAL (XEXP (addr1, 1));
18481 }
18482 }
18483 else if (!REG_P (addr1))
18484 return 0;
18485 else
18486 {
18487 reg1 = REGNO (addr1);
18488 /* This was a simple (mem (reg)) expression. Offset is 0. */
18489 offset1 = 0;
18490 }
18491
18492 /* And now for the second addr. */
18493 if (GET_CODE (addr2) == PLUS)
18494 {
18495 /* If not a REG, return zero. */
18496 if (!REG_P (XEXP (addr2, 0)))
18497 return 0;
18498 else
18499 {
18500 reg2 = REGNO (XEXP (addr2, 0));
18501 /* The offset must be constant. */
18502 if (!CONST_INT_P (XEXP (addr2, 1)))
18503 return 0;
18504 offset2 = INTVAL (XEXP (addr2, 1));
18505 }
18506 }
18507 else if (!REG_P (addr2))
18508 return 0;
18509 else
18510 {
18511 reg2 = REGNO (addr2);
18512 /* This was a simple (mem (reg)) expression. Offset is 0. */
18513 offset2 = 0;
18514 }
18515
18516 /* Both of these must have the same base register. */
18517 if (reg1 != reg2)
18518 return 0;
18519
18520 /* The offset for the second addr must be 8 more than the first addr. */
18521 if (offset2 != offset1 + 8)
18522 return 0;
18523
18524 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18525 instructions. */
18526 return 1;
18527 }
18528 \f
18529 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18530 need to use DDmode, in all other cases we can use the same mode. */
18531 static machine_mode
18532 rs6000_secondary_memory_needed_mode (machine_mode mode)
18533 {
18534 if (lra_in_progress && mode == SDmode)
18535 return DDmode;
18536 return mode;
18537 }
18538
18539 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18540 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18541 only work on the traditional altivec registers, note if an altivec register
18542 was chosen. */
18543
18544 static enum rs6000_reg_type
18545 register_to_reg_type (rtx reg, bool *is_altivec)
18546 {
18547 HOST_WIDE_INT regno;
18548 enum reg_class rclass;
18549
18550 if (SUBREG_P (reg))
18551 reg = SUBREG_REG (reg);
18552
18553 if (!REG_P (reg))
18554 return NO_REG_TYPE;
18555
18556 regno = REGNO (reg);
18557 if (!HARD_REGISTER_NUM_P (regno))
18558 {
18559 if (!lra_in_progress && !reload_completed)
18560 return PSEUDO_REG_TYPE;
18561
18562 regno = true_regnum (reg);
18563 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
18564 return PSEUDO_REG_TYPE;
18565 }
18566
18567 gcc_assert (regno >= 0);
18568
18569 if (is_altivec && ALTIVEC_REGNO_P (regno))
18570 *is_altivec = true;
18571
18572 rclass = rs6000_regno_regclass[regno];
18573 return reg_class_to_reg_type[(int)rclass];
18574 }
18575
18576 /* Helper function to return the cost of adding a TOC entry address. */
18577
18578 static inline int
18579 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18580 {
18581 int ret;
18582
18583 if (TARGET_CMODEL != CMODEL_SMALL)
18584 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18585
18586 else
18587 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18588
18589 return ret;
18590 }
18591
18592 /* Helper function for rs6000_secondary_reload to determine whether the memory
18593 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18594 needs reloading. Return negative if the memory is not handled by the memory
18595 helper functions and to try a different reload method, 0 if no additional
18596 instructions are need, and positive to give the extra cost for the
18597 memory. */
18598
18599 static int
18600 rs6000_secondary_reload_memory (rtx addr,
18601 enum reg_class rclass,
18602 machine_mode mode)
18603 {
18604 int extra_cost = 0;
18605 rtx reg, and_arg, plus_arg0, plus_arg1;
18606 addr_mask_type addr_mask;
18607 const char *type = NULL;
18608 const char *fail_msg = NULL;
18609
18610 if (GPR_REG_CLASS_P (rclass))
18611 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18612
18613 else if (rclass == FLOAT_REGS)
18614 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18615
18616 else if (rclass == ALTIVEC_REGS)
18617 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18618
18619 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18620 else if (rclass == VSX_REGS)
18621 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18622 & ~RELOAD_REG_AND_M16);
18623
18624 /* If the register allocator hasn't made up its mind yet on the register
18625 class to use, settle on defaults to use. */
18626 else if (rclass == NO_REGS)
18627 {
18628 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18629 & ~RELOAD_REG_AND_M16);
18630
18631 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18632 addr_mask &= ~(RELOAD_REG_INDEXED
18633 | RELOAD_REG_PRE_INCDEC
18634 | RELOAD_REG_PRE_MODIFY);
18635 }
18636
18637 else
18638 addr_mask = 0;
18639
18640 /* If the register isn't valid in this register class, just return now. */
18641 if ((addr_mask & RELOAD_REG_VALID) == 0)
18642 {
18643 if (TARGET_DEBUG_ADDR)
18644 {
18645 fprintf (stderr,
18646 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18647 "not valid in class\n",
18648 GET_MODE_NAME (mode), reg_class_names[rclass]);
18649 debug_rtx (addr);
18650 }
18651
18652 return -1;
18653 }
18654
18655 switch (GET_CODE (addr))
18656 {
18657 /* Does the register class supports auto update forms for this mode? We
18658 don't need a scratch register, since the powerpc only supports
18659 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18660 case PRE_INC:
18661 case PRE_DEC:
18662 reg = XEXP (addr, 0);
18663 if (!base_reg_operand (addr, GET_MODE (reg)))
18664 {
18665 fail_msg = "no base register #1";
18666 extra_cost = -1;
18667 }
18668
18669 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18670 {
18671 extra_cost = 1;
18672 type = "update";
18673 }
18674 break;
18675
18676 case PRE_MODIFY:
18677 reg = XEXP (addr, 0);
18678 plus_arg1 = XEXP (addr, 1);
18679 if (!base_reg_operand (reg, GET_MODE (reg))
18680 || GET_CODE (plus_arg1) != PLUS
18681 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18682 {
18683 fail_msg = "bad PRE_MODIFY";
18684 extra_cost = -1;
18685 }
18686
18687 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18688 {
18689 extra_cost = 1;
18690 type = "update";
18691 }
18692 break;
18693
18694 /* Do we need to simulate AND -16 to clear the bottom address bits used
18695 in VMX load/stores? Only allow the AND for vector sizes. */
18696 case AND:
18697 and_arg = XEXP (addr, 0);
18698 if (GET_MODE_SIZE (mode) != 16
18699 || !CONST_INT_P (XEXP (addr, 1))
18700 || INTVAL (XEXP (addr, 1)) != -16)
18701 {
18702 fail_msg = "bad Altivec AND #1";
18703 extra_cost = -1;
18704 }
18705
18706 if (rclass != ALTIVEC_REGS)
18707 {
18708 if (legitimate_indirect_address_p (and_arg, false))
18709 extra_cost = 1;
18710
18711 else if (legitimate_indexed_address_p (and_arg, false))
18712 extra_cost = 2;
18713
18714 else
18715 {
18716 fail_msg = "bad Altivec AND #2";
18717 extra_cost = -1;
18718 }
18719
18720 type = "and";
18721 }
18722 break;
18723
18724 /* If this is an indirect address, make sure it is a base register. */
18725 case REG:
18726 case SUBREG:
18727 if (!legitimate_indirect_address_p (addr, false))
18728 {
18729 extra_cost = 1;
18730 type = "move";
18731 }
18732 break;
18733
18734 /* If this is an indexed address, make sure the register class can handle
18735 indexed addresses for this mode. */
18736 case PLUS:
18737 plus_arg0 = XEXP (addr, 0);
18738 plus_arg1 = XEXP (addr, 1);
18739
18740 /* (plus (plus (reg) (constant)) (constant)) is generated during
18741 push_reload processing, so handle it now. */
18742 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18743 {
18744 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18745 {
18746 extra_cost = 1;
18747 type = "offset";
18748 }
18749 }
18750
18751 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18752 push_reload processing, so handle it now. */
18753 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18754 {
18755 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18756 {
18757 extra_cost = 1;
18758 type = "indexed #2";
18759 }
18760 }
18761
18762 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18763 {
18764 fail_msg = "no base register #2";
18765 extra_cost = -1;
18766 }
18767
18768 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18769 {
18770 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18771 || !legitimate_indexed_address_p (addr, false))
18772 {
18773 extra_cost = 1;
18774 type = "indexed";
18775 }
18776 }
18777
18778 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18779 && CONST_INT_P (plus_arg1))
18780 {
18781 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18782 {
18783 extra_cost = 1;
18784 type = "vector d-form offset";
18785 }
18786 }
18787
18788 /* Make sure the register class can handle offset addresses. */
18789 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18790 {
18791 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18792 {
18793 extra_cost = 1;
18794 type = "offset #2";
18795 }
18796 }
18797
18798 else
18799 {
18800 fail_msg = "bad PLUS";
18801 extra_cost = -1;
18802 }
18803
18804 break;
18805
18806 case LO_SUM:
18807 /* Quad offsets are restricted and can't handle normal addresses. */
18808 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18809 {
18810 extra_cost = -1;
18811 type = "vector d-form lo_sum";
18812 }
18813
18814 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18815 {
18816 fail_msg = "bad LO_SUM";
18817 extra_cost = -1;
18818 }
18819
18820 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18821 {
18822 extra_cost = 1;
18823 type = "lo_sum";
18824 }
18825 break;
18826
18827 /* Static addresses need to create a TOC entry. */
18828 case CONST:
18829 case SYMBOL_REF:
18830 case LABEL_REF:
18831 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18832 {
18833 extra_cost = -1;
18834 type = "vector d-form lo_sum #2";
18835 }
18836
18837 else
18838 {
18839 type = "address";
18840 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18841 }
18842 break;
18843
18844 /* TOC references look like offsetable memory. */
18845 case UNSPEC:
18846 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18847 {
18848 fail_msg = "bad UNSPEC";
18849 extra_cost = -1;
18850 }
18851
18852 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18853 {
18854 extra_cost = -1;
18855 type = "vector d-form lo_sum #3";
18856 }
18857
18858 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18859 {
18860 extra_cost = 1;
18861 type = "toc reference";
18862 }
18863 break;
18864
18865 default:
18866 {
18867 fail_msg = "bad address";
18868 extra_cost = -1;
18869 }
18870 }
18871
18872 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18873 {
18874 if (extra_cost < 0)
18875 fprintf (stderr,
18876 "rs6000_secondary_reload_memory error: mode = %s, "
18877 "class = %s, addr_mask = '%s', %s\n",
18878 GET_MODE_NAME (mode),
18879 reg_class_names[rclass],
18880 rs6000_debug_addr_mask (addr_mask, false),
18881 (fail_msg != NULL) ? fail_msg : "<bad address>");
18882
18883 else
18884 fprintf (stderr,
18885 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18886 "addr_mask = '%s', extra cost = %d, %s\n",
18887 GET_MODE_NAME (mode),
18888 reg_class_names[rclass],
18889 rs6000_debug_addr_mask (addr_mask, false),
18890 extra_cost,
18891 (type) ? type : "<none>");
18892
18893 debug_rtx (addr);
18894 }
18895
18896 return extra_cost;
18897 }
18898
18899 /* Helper function for rs6000_secondary_reload to return true if a move to a
18900 different register classe is really a simple move. */
18901
18902 static bool
18903 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18904 enum rs6000_reg_type from_type,
18905 machine_mode mode)
18906 {
18907 int size = GET_MODE_SIZE (mode);
18908
18909 /* Add support for various direct moves available. In this function, we only
18910 look at cases where we don't need any extra registers, and one or more
18911 simple move insns are issued. Originally small integers are not allowed
18912 in FPR/VSX registers. Single precision binary floating is not a simple
18913 move because we need to convert to the single precision memory layout.
18914 The 4-byte SDmode can be moved. TDmode values are disallowed since they
18915 need special direct move handling, which we do not support yet. */
18916 if (TARGET_DIRECT_MOVE
18917 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18918 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18919 {
18920 if (TARGET_POWERPC64)
18921 {
18922 /* ISA 2.07: MTVSRD or MVFVSRD. */
18923 if (size == 8)
18924 return true;
18925
18926 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
18927 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
18928 return true;
18929 }
18930
18931 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18932 if (TARGET_P8_VECTOR)
18933 {
18934 if (mode == SImode)
18935 return true;
18936
18937 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
18938 return true;
18939 }
18940
18941 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18942 if (mode == SDmode)
18943 return true;
18944 }
18945
18946 /* Move to/from SPR. */
18947 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18948 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18949 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18950 return true;
18951
18952 return false;
18953 }
18954
18955 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18956 special direct moves that involve allocating an extra register, return the
18957 insn code of the helper function if there is such a function or
18958 CODE_FOR_nothing if not. */
18959
18960 static bool
18961 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18962 enum rs6000_reg_type from_type,
18963 machine_mode mode,
18964 secondary_reload_info *sri,
18965 bool altivec_p)
18966 {
18967 bool ret = false;
18968 enum insn_code icode = CODE_FOR_nothing;
18969 int cost = 0;
18970 int size = GET_MODE_SIZE (mode);
18971
18972 if (TARGET_POWERPC64 && size == 16)
18973 {
18974 /* Handle moving 128-bit values from GPRs to VSX point registers on
18975 ISA 2.07 (power8, power9) when running in 64-bit mode using
18976 XXPERMDI to glue the two 64-bit values back together. */
18977 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18978 {
18979 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18980 icode = reg_addr[mode].reload_vsx_gpr;
18981 }
18982
18983 /* Handle moving 128-bit values from VSX point registers to GPRs on
18984 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18985 bottom 64-bit value. */
18986 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18987 {
18988 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18989 icode = reg_addr[mode].reload_gpr_vsx;
18990 }
18991 }
18992
18993 else if (TARGET_POWERPC64 && mode == SFmode)
18994 {
18995 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18996 {
18997 cost = 3; /* xscvdpspn, mfvsrd, and. */
18998 icode = reg_addr[mode].reload_gpr_vsx;
18999 }
19000
19001 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19002 {
19003 cost = 2; /* mtvsrz, xscvspdpn. */
19004 icode = reg_addr[mode].reload_vsx_gpr;
19005 }
19006 }
19007
19008 else if (!TARGET_POWERPC64 && size == 8)
19009 {
19010 /* Handle moving 64-bit values from GPRs to floating point registers on
19011 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19012 32-bit values back together. Altivec register classes must be handled
19013 specially since a different instruction is used, and the secondary
19014 reload support requires a single instruction class in the scratch
19015 register constraint. However, right now TFmode is not allowed in
19016 Altivec registers, so the pattern will never match. */
19017 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19018 {
19019 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19020 icode = reg_addr[mode].reload_fpr_gpr;
19021 }
19022 }
19023
19024 if (icode != CODE_FOR_nothing)
19025 {
19026 ret = true;
19027 if (sri)
19028 {
19029 sri->icode = icode;
19030 sri->extra_cost = cost;
19031 }
19032 }
19033
19034 return ret;
19035 }
19036
19037 /* Return whether a move between two register classes can be done either
19038 directly (simple move) or via a pattern that uses a single extra temporary
19039 (using ISA 2.07's direct move in this case. */
19040
19041 static bool
19042 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19043 enum rs6000_reg_type from_type,
19044 machine_mode mode,
19045 secondary_reload_info *sri,
19046 bool altivec_p)
19047 {
19048 /* Fall back to load/store reloads if either type is not a register. */
19049 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19050 return false;
19051
19052 /* If we haven't allocated registers yet, assume the move can be done for the
19053 standard register types. */
19054 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19055 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19056 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19057 return true;
19058
19059 /* Moves to the same set of registers is a simple move for non-specialized
19060 registers. */
19061 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19062 return true;
19063
19064 /* Check whether a simple move can be done directly. */
19065 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19066 {
19067 if (sri)
19068 {
19069 sri->icode = CODE_FOR_nothing;
19070 sri->extra_cost = 0;
19071 }
19072 return true;
19073 }
19074
19075 /* Now check if we can do it in a few steps. */
19076 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19077 altivec_p);
19078 }
19079
19080 /* Inform reload about cases where moving X with a mode MODE to a register in
19081 RCLASS requires an extra scratch or immediate register. Return the class
19082 needed for the immediate register.
19083
19084 For VSX and Altivec, we may need a register to convert sp+offset into
19085 reg+sp.
19086
19087 For misaligned 64-bit gpr loads and stores we need a register to
19088 convert an offset address to indirect. */
19089
19090 static reg_class_t
19091 rs6000_secondary_reload (bool in_p,
19092 rtx x,
19093 reg_class_t rclass_i,
19094 machine_mode mode,
19095 secondary_reload_info *sri)
19096 {
19097 enum reg_class rclass = (enum reg_class) rclass_i;
19098 reg_class_t ret = ALL_REGS;
19099 enum insn_code icode;
19100 bool default_p = false;
19101 bool done_p = false;
19102
19103 /* Allow subreg of memory before/during reload. */
19104 bool memory_p = (MEM_P (x)
19105 || (!reload_completed && SUBREG_P (x)
19106 && MEM_P (SUBREG_REG (x))));
19107
19108 sri->icode = CODE_FOR_nothing;
19109 sri->t_icode = CODE_FOR_nothing;
19110 sri->extra_cost = 0;
19111 icode = ((in_p)
19112 ? reg_addr[mode].reload_load
19113 : reg_addr[mode].reload_store);
19114
19115 if (REG_P (x) || register_operand (x, mode))
19116 {
19117 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19118 bool altivec_p = (rclass == ALTIVEC_REGS);
19119 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19120
19121 if (!in_p)
19122 std::swap (to_type, from_type);
19123
19124 /* Can we do a direct move of some sort? */
19125 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19126 altivec_p))
19127 {
19128 icode = (enum insn_code)sri->icode;
19129 default_p = false;
19130 done_p = true;
19131 ret = NO_REGS;
19132 }
19133 }
19134
19135 /* Make sure 0.0 is not reloaded or forced into memory. */
19136 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19137 {
19138 ret = NO_REGS;
19139 default_p = false;
19140 done_p = true;
19141 }
19142
19143 /* If this is a scalar floating point value and we want to load it into the
19144 traditional Altivec registers, do it via a move via a traditional floating
19145 point register, unless we have D-form addressing. Also make sure that
19146 non-zero constants use a FPR. */
19147 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19148 && !mode_supports_vmx_dform (mode)
19149 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19150 && (memory_p || CONST_DOUBLE_P (x)))
19151 {
19152 ret = FLOAT_REGS;
19153 default_p = false;
19154 done_p = true;
19155 }
19156
19157 /* Handle reload of load/stores if we have reload helper functions. */
19158 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19159 {
19160 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19161 mode);
19162
19163 if (extra_cost >= 0)
19164 {
19165 done_p = true;
19166 ret = NO_REGS;
19167 if (extra_cost > 0)
19168 {
19169 sri->extra_cost = extra_cost;
19170 sri->icode = icode;
19171 }
19172 }
19173 }
19174
19175 /* Handle unaligned loads and stores of integer registers. */
19176 if (!done_p && TARGET_POWERPC64
19177 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19178 && memory_p
19179 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19180 {
19181 rtx addr = XEXP (x, 0);
19182 rtx off = address_offset (addr);
19183
19184 if (off != NULL_RTX)
19185 {
19186 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19187 unsigned HOST_WIDE_INT offset = INTVAL (off);
19188
19189 /* We need a secondary reload when our legitimate_address_p
19190 says the address is good (as otherwise the entire address
19191 will be reloaded), and the offset is not a multiple of
19192 four or we have an address wrap. Address wrap will only
19193 occur for LO_SUMs since legitimate_offset_address_p
19194 rejects addresses for 16-byte mems that will wrap. */
19195 if (GET_CODE (addr) == LO_SUM
19196 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19197 && ((offset & 3) != 0
19198 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19199 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19200 && (offset & 3) != 0))
19201 {
19202 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19203 if (in_p)
19204 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19205 : CODE_FOR_reload_di_load);
19206 else
19207 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19208 : CODE_FOR_reload_di_store);
19209 sri->extra_cost = 2;
19210 ret = NO_REGS;
19211 done_p = true;
19212 }
19213 else
19214 default_p = true;
19215 }
19216 else
19217 default_p = true;
19218 }
19219
19220 if (!done_p && !TARGET_POWERPC64
19221 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19222 && memory_p
19223 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19224 {
19225 rtx addr = XEXP (x, 0);
19226 rtx off = address_offset (addr);
19227
19228 if (off != NULL_RTX)
19229 {
19230 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19231 unsigned HOST_WIDE_INT offset = INTVAL (off);
19232
19233 /* We need a secondary reload when our legitimate_address_p
19234 says the address is good (as otherwise the entire address
19235 will be reloaded), and we have a wrap.
19236
19237 legitimate_lo_sum_address_p allows LO_SUM addresses to
19238 have any offset so test for wrap in the low 16 bits.
19239
19240 legitimate_offset_address_p checks for the range
19241 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19242 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19243 [0x7ff4,0x7fff] respectively, so test for the
19244 intersection of these ranges, [0x7ffc,0x7fff] and
19245 [0x7ff4,0x7ff7] respectively.
19246
19247 Note that the address we see here may have been
19248 manipulated by legitimize_reload_address. */
19249 if (GET_CODE (addr) == LO_SUM
19250 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19251 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19252 {
19253 if (in_p)
19254 sri->icode = CODE_FOR_reload_si_load;
19255 else
19256 sri->icode = CODE_FOR_reload_si_store;
19257 sri->extra_cost = 2;
19258 ret = NO_REGS;
19259 done_p = true;
19260 }
19261 else
19262 default_p = true;
19263 }
19264 else
19265 default_p = true;
19266 }
19267
19268 if (!done_p)
19269 default_p = true;
19270
19271 if (default_p)
19272 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19273
19274 gcc_assert (ret != ALL_REGS);
19275
19276 if (TARGET_DEBUG_ADDR)
19277 {
19278 fprintf (stderr,
19279 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19280 "mode = %s",
19281 reg_class_names[ret],
19282 in_p ? "true" : "false",
19283 reg_class_names[rclass],
19284 GET_MODE_NAME (mode));
19285
19286 if (reload_completed)
19287 fputs (", after reload", stderr);
19288
19289 if (!done_p)
19290 fputs (", done_p not set", stderr);
19291
19292 if (default_p)
19293 fputs (", default secondary reload", stderr);
19294
19295 if (sri->icode != CODE_FOR_nothing)
19296 fprintf (stderr, ", reload func = %s, extra cost = %d",
19297 insn_data[sri->icode].name, sri->extra_cost);
19298
19299 else if (sri->extra_cost > 0)
19300 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19301
19302 fputs ("\n", stderr);
19303 debug_rtx (x);
19304 }
19305
19306 return ret;
19307 }
19308
19309 /* Better tracing for rs6000_secondary_reload_inner. */
19310
19311 static void
19312 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19313 bool store_p)
19314 {
19315 rtx set, clobber;
19316
19317 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19318
19319 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19320 store_p ? "store" : "load");
19321
19322 if (store_p)
19323 set = gen_rtx_SET (mem, reg);
19324 else
19325 set = gen_rtx_SET (reg, mem);
19326
19327 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19328 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19329 }
19330
19331 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19332 ATTRIBUTE_NORETURN;
19333
19334 static void
19335 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19336 bool store_p)
19337 {
19338 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19339 gcc_unreachable ();
19340 }
19341
19342 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19343 reload helper functions. These were identified in
19344 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19345 reload, it calls the insns:
19346 reload_<RELOAD:mode>_<P:mptrsize>_store
19347 reload_<RELOAD:mode>_<P:mptrsize>_load
19348
19349 which in turn calls this function, to do whatever is necessary to create
19350 valid addresses. */
19351
19352 void
19353 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19354 {
19355 int regno = true_regnum (reg);
19356 machine_mode mode = GET_MODE (reg);
19357 addr_mask_type addr_mask;
19358 rtx addr;
19359 rtx new_addr;
19360 rtx op_reg, op0, op1;
19361 rtx and_op;
19362 rtx cc_clobber;
19363 rtvec rv;
19364
19365 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
19366 || !base_reg_operand (scratch, GET_MODE (scratch)))
19367 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19368
19369 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19370 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19371
19372 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19373 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19374
19375 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19376 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19377
19378 else
19379 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19380
19381 /* Make sure the mode is valid in this register class. */
19382 if ((addr_mask & RELOAD_REG_VALID) == 0)
19383 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19384
19385 if (TARGET_DEBUG_ADDR)
19386 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19387
19388 new_addr = addr = XEXP (mem, 0);
19389 switch (GET_CODE (addr))
19390 {
19391 /* Does the register class support auto update forms for this mode? If
19392 not, do the update now. We don't need a scratch register, since the
19393 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19394 case PRE_INC:
19395 case PRE_DEC:
19396 op_reg = XEXP (addr, 0);
19397 if (!base_reg_operand (op_reg, Pmode))
19398 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19399
19400 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19401 {
19402 int delta = GET_MODE_SIZE (mode);
19403 if (GET_CODE (addr) == PRE_DEC)
19404 delta = -delta;
19405 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
19406 new_addr = op_reg;
19407 }
19408 break;
19409
19410 case PRE_MODIFY:
19411 op0 = XEXP (addr, 0);
19412 op1 = XEXP (addr, 1);
19413 if (!base_reg_operand (op0, Pmode)
19414 || GET_CODE (op1) != PLUS
19415 || !rtx_equal_p (op0, XEXP (op1, 0)))
19416 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19417
19418 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19419 {
19420 emit_insn (gen_rtx_SET (op0, op1));
19421 new_addr = reg;
19422 }
19423 break;
19424
19425 /* Do we need to simulate AND -16 to clear the bottom address bits used
19426 in VMX load/stores? */
19427 case AND:
19428 op0 = XEXP (addr, 0);
19429 op1 = XEXP (addr, 1);
19430 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19431 {
19432 if (REG_P (op0) || SUBREG_P (op0))
19433 op_reg = op0;
19434
19435 else if (GET_CODE (op1) == PLUS)
19436 {
19437 emit_insn (gen_rtx_SET (scratch, op1));
19438 op_reg = scratch;
19439 }
19440
19441 else
19442 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19443
19444 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19445 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19446 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19447 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19448 new_addr = scratch;
19449 }
19450 break;
19451
19452 /* If this is an indirect address, make sure it is a base register. */
19453 case REG:
19454 case SUBREG:
19455 if (!base_reg_operand (addr, GET_MODE (addr)))
19456 {
19457 emit_insn (gen_rtx_SET (scratch, addr));
19458 new_addr = scratch;
19459 }
19460 break;
19461
19462 /* If this is an indexed address, make sure the register class can handle
19463 indexed addresses for this mode. */
19464 case PLUS:
19465 op0 = XEXP (addr, 0);
19466 op1 = XEXP (addr, 1);
19467 if (!base_reg_operand (op0, Pmode))
19468 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19469
19470 else if (int_reg_operand (op1, Pmode))
19471 {
19472 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19473 {
19474 emit_insn (gen_rtx_SET (scratch, addr));
19475 new_addr = scratch;
19476 }
19477 }
19478
19479 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19480 {
19481 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19482 || !quad_address_p (addr, mode, false))
19483 {
19484 emit_insn (gen_rtx_SET (scratch, addr));
19485 new_addr = scratch;
19486 }
19487 }
19488
19489 /* Make sure the register class can handle offset addresses. */
19490 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19491 {
19492 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19493 {
19494 emit_insn (gen_rtx_SET (scratch, addr));
19495 new_addr = scratch;
19496 }
19497 }
19498
19499 else
19500 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19501
19502 break;
19503
19504 case LO_SUM:
19505 op0 = XEXP (addr, 0);
19506 op1 = XEXP (addr, 1);
19507 if (!base_reg_operand (op0, Pmode))
19508 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19509
19510 else if (int_reg_operand (op1, Pmode))
19511 {
19512 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19513 {
19514 emit_insn (gen_rtx_SET (scratch, addr));
19515 new_addr = scratch;
19516 }
19517 }
19518
19519 /* Quad offsets are restricted and can't handle normal addresses. */
19520 else if (mode_supports_dq_form (mode))
19521 {
19522 emit_insn (gen_rtx_SET (scratch, addr));
19523 new_addr = scratch;
19524 }
19525
19526 /* Make sure the register class can handle offset addresses. */
19527 else if (legitimate_lo_sum_address_p (mode, addr, false))
19528 {
19529 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19530 {
19531 emit_insn (gen_rtx_SET (scratch, addr));
19532 new_addr = scratch;
19533 }
19534 }
19535
19536 else
19537 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19538
19539 break;
19540
19541 case SYMBOL_REF:
19542 case CONST:
19543 case LABEL_REF:
19544 rs6000_emit_move (scratch, addr, Pmode);
19545 new_addr = scratch;
19546 break;
19547
19548 default:
19549 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19550 }
19551
19552 /* Adjust the address if it changed. */
19553 if (addr != new_addr)
19554 {
19555 mem = replace_equiv_address_nv (mem, new_addr);
19556 if (TARGET_DEBUG_ADDR)
19557 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19558 }
19559
19560 /* Now create the move. */
19561 if (store_p)
19562 emit_insn (gen_rtx_SET (mem, reg));
19563 else
19564 emit_insn (gen_rtx_SET (reg, mem));
19565
19566 return;
19567 }
19568
19569 /* Convert reloads involving 64-bit gprs and misaligned offset
19570 addressing, or multiple 32-bit gprs and offsets that are too large,
19571 to use indirect addressing. */
19572
19573 void
19574 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19575 {
19576 int regno = true_regnum (reg);
19577 enum reg_class rclass;
19578 rtx addr;
19579 rtx scratch_or_premodify = scratch;
19580
19581 if (TARGET_DEBUG_ADDR)
19582 {
19583 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19584 store_p ? "store" : "load");
19585 fprintf (stderr, "reg:\n");
19586 debug_rtx (reg);
19587 fprintf (stderr, "mem:\n");
19588 debug_rtx (mem);
19589 fprintf (stderr, "scratch:\n");
19590 debug_rtx (scratch);
19591 }
19592
19593 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
19594 gcc_assert (MEM_P (mem));
19595 rclass = REGNO_REG_CLASS (regno);
19596 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19597 addr = XEXP (mem, 0);
19598
19599 if (GET_CODE (addr) == PRE_MODIFY)
19600 {
19601 gcc_assert (REG_P (XEXP (addr, 0))
19602 && GET_CODE (XEXP (addr, 1)) == PLUS
19603 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19604 scratch_or_premodify = XEXP (addr, 0);
19605 addr = XEXP (addr, 1);
19606 }
19607 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19608
19609 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19610
19611 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19612
19613 /* Now create the move. */
19614 if (store_p)
19615 emit_insn (gen_rtx_SET (mem, reg));
19616 else
19617 emit_insn (gen_rtx_SET (reg, mem));
19618
19619 return;
19620 }
19621
19622 /* Given an rtx X being reloaded into a reg required to be
19623 in class CLASS, return the class of reg to actually use.
19624 In general this is just CLASS; but on some machines
19625 in some cases it is preferable to use a more restrictive class.
19626
19627 On the RS/6000, we have to return NO_REGS when we want to reload a
19628 floating-point CONST_DOUBLE to force it to be copied to memory.
19629
19630 We also don't want to reload integer values into floating-point
19631 registers if we can at all help it. In fact, this can
19632 cause reload to die, if it tries to generate a reload of CTR
19633 into a FP register and discovers it doesn't have the memory location
19634 required.
19635
19636 ??? Would it be a good idea to have reload do the converse, that is
19637 try to reload floating modes into FP registers if possible?
19638 */
19639
19640 static enum reg_class
19641 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19642 {
19643 machine_mode mode = GET_MODE (x);
19644 bool is_constant = CONSTANT_P (x);
19645
19646 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19647 reload class for it. */
19648 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19649 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19650 return NO_REGS;
19651
19652 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19653 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19654 return NO_REGS;
19655
19656 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19657 the reloading of address expressions using PLUS into floating point
19658 registers. */
19659 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19660 {
19661 if (is_constant)
19662 {
19663 /* Zero is always allowed in all VSX registers. */
19664 if (x == CONST0_RTX (mode))
19665 return rclass;
19666
19667 /* If this is a vector constant that can be formed with a few Altivec
19668 instructions, we want altivec registers. */
19669 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19670 return ALTIVEC_REGS;
19671
19672 /* If this is an integer constant that can easily be loaded into
19673 vector registers, allow it. */
19674 if (CONST_INT_P (x))
19675 {
19676 HOST_WIDE_INT value = INTVAL (x);
19677
19678 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19679 2.06 can generate it in the Altivec registers with
19680 VSPLTI<x>. */
19681 if (value == -1)
19682 {
19683 if (TARGET_P8_VECTOR)
19684 return rclass;
19685 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19686 return ALTIVEC_REGS;
19687 else
19688 return NO_REGS;
19689 }
19690
19691 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19692 a sign extend in the Altivec registers. */
19693 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19694 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19695 return ALTIVEC_REGS;
19696 }
19697
19698 /* Force constant to memory. */
19699 return NO_REGS;
19700 }
19701
19702 /* D-form addressing can easily reload the value. */
19703 if (mode_supports_vmx_dform (mode)
19704 || mode_supports_dq_form (mode))
19705 return rclass;
19706
19707 /* If this is a scalar floating point value and we don't have D-form
19708 addressing, prefer the traditional floating point registers so that we
19709 can use D-form (register+offset) addressing. */
19710 if (rclass == VSX_REGS
19711 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19712 return FLOAT_REGS;
19713
19714 /* Prefer the Altivec registers if Altivec is handling the vector
19715 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19716 loads. */
19717 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19718 || mode == V1TImode)
19719 return ALTIVEC_REGS;
19720
19721 return rclass;
19722 }
19723
19724 if (is_constant || GET_CODE (x) == PLUS)
19725 {
19726 if (reg_class_subset_p (GENERAL_REGS, rclass))
19727 return GENERAL_REGS;
19728 if (reg_class_subset_p (BASE_REGS, rclass))
19729 return BASE_REGS;
19730 return NO_REGS;
19731 }
19732
19733 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
19734 return GENERAL_REGS;
19735
19736 return rclass;
19737 }
19738
19739 /* Debug version of rs6000_preferred_reload_class. */
19740 static enum reg_class
19741 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19742 {
19743 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19744
19745 fprintf (stderr,
19746 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19747 "mode = %s, x:\n",
19748 reg_class_names[ret], reg_class_names[rclass],
19749 GET_MODE_NAME (GET_MODE (x)));
19750 debug_rtx (x);
19751
19752 return ret;
19753 }
19754
19755 /* If we are copying between FP or AltiVec registers and anything else, we need
19756 a memory location. The exception is when we are targeting ppc64 and the
19757 move to/from fpr to gpr instructions are available. Also, under VSX, you
19758 can copy vector registers from the FP register set to the Altivec register
19759 set and vice versa. */
19760
19761 static bool
19762 rs6000_secondary_memory_needed (machine_mode mode,
19763 reg_class_t from_class,
19764 reg_class_t to_class)
19765 {
19766 enum rs6000_reg_type from_type, to_type;
19767 bool altivec_p = ((from_class == ALTIVEC_REGS)
19768 || (to_class == ALTIVEC_REGS));
19769
19770 /* If a simple/direct move is available, we don't need secondary memory */
19771 from_type = reg_class_to_reg_type[(int)from_class];
19772 to_type = reg_class_to_reg_type[(int)to_class];
19773
19774 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19775 (secondary_reload_info *)0, altivec_p))
19776 return false;
19777
19778 /* If we have a floating point or vector register class, we need to use
19779 memory to transfer the data. */
19780 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19781 return true;
19782
19783 return false;
19784 }
19785
19786 /* Debug version of rs6000_secondary_memory_needed. */
19787 static bool
19788 rs6000_debug_secondary_memory_needed (machine_mode mode,
19789 reg_class_t from_class,
19790 reg_class_t to_class)
19791 {
19792 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19793
19794 fprintf (stderr,
19795 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19796 "to_class = %s, mode = %s\n",
19797 ret ? "true" : "false",
19798 reg_class_names[from_class],
19799 reg_class_names[to_class],
19800 GET_MODE_NAME (mode));
19801
19802 return ret;
19803 }
19804
19805 /* Return the register class of a scratch register needed to copy IN into
19806 or out of a register in RCLASS in MODE. If it can be done directly,
19807 NO_REGS is returned. */
19808
19809 static enum reg_class
19810 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19811 rtx in)
19812 {
19813 int regno;
19814
19815 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19816 #if TARGET_MACHO
19817 && MACHOPIC_INDIRECT
19818 #endif
19819 ))
19820 {
19821 /* We cannot copy a symbolic operand directly into anything
19822 other than BASE_REGS for TARGET_ELF. So indicate that a
19823 register from BASE_REGS is needed as an intermediate
19824 register.
19825
19826 On Darwin, pic addresses require a load from memory, which
19827 needs a base register. */
19828 if (rclass != BASE_REGS
19829 && (SYMBOL_REF_P (in)
19830 || GET_CODE (in) == HIGH
19831 || GET_CODE (in) == LABEL_REF
19832 || GET_CODE (in) == CONST))
19833 return BASE_REGS;
19834 }
19835
19836 if (REG_P (in))
19837 {
19838 regno = REGNO (in);
19839 if (!HARD_REGISTER_NUM_P (regno))
19840 {
19841 regno = true_regnum (in);
19842 if (!HARD_REGISTER_NUM_P (regno))
19843 regno = -1;
19844 }
19845 }
19846 else if (SUBREG_P (in))
19847 {
19848 regno = true_regnum (in);
19849 if (!HARD_REGISTER_NUM_P (regno))
19850 regno = -1;
19851 }
19852 else
19853 regno = -1;
19854
19855 /* If we have VSX register moves, prefer moving scalar values between
19856 Altivec registers and GPR by going via an FPR (and then via memory)
19857 instead of reloading the secondary memory address for Altivec moves. */
19858 if (TARGET_VSX
19859 && GET_MODE_SIZE (mode) < 16
19860 && !mode_supports_vmx_dform (mode)
19861 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19862 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19863 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19864 && (regno >= 0 && INT_REGNO_P (regno)))))
19865 return FLOAT_REGS;
19866
19867 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19868 into anything. */
19869 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19870 || (regno >= 0 && INT_REGNO_P (regno)))
19871 return NO_REGS;
19872
19873 /* Constants, memory, and VSX registers can go into VSX registers (both the
19874 traditional floating point and the altivec registers). */
19875 if (rclass == VSX_REGS
19876 && (regno == -1 || VSX_REGNO_P (regno)))
19877 return NO_REGS;
19878
19879 /* Constants, memory, and FP registers can go into FP registers. */
19880 if ((regno == -1 || FP_REGNO_P (regno))
19881 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
19882 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19883
19884 /* Memory, and AltiVec registers can go into AltiVec registers. */
19885 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19886 && rclass == ALTIVEC_REGS)
19887 return NO_REGS;
19888
19889 /* We can copy among the CR registers. */
19890 if ((rclass == CR_REGS || rclass == CR0_REGS)
19891 && regno >= 0 && CR_REGNO_P (regno))
19892 return NO_REGS;
19893
19894 /* Otherwise, we need GENERAL_REGS. */
19895 return GENERAL_REGS;
19896 }
19897
19898 /* Debug version of rs6000_secondary_reload_class. */
19899 static enum reg_class
19900 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19901 machine_mode mode, rtx in)
19902 {
19903 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19904 fprintf (stderr,
19905 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19906 "mode = %s, input rtx:\n",
19907 reg_class_names[ret], reg_class_names[rclass],
19908 GET_MODE_NAME (mode));
19909 debug_rtx (in);
19910
19911 return ret;
19912 }
19913
19914 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19915
19916 static bool
19917 rs6000_can_change_mode_class (machine_mode from,
19918 machine_mode to,
19919 reg_class_t rclass)
19920 {
19921 unsigned from_size = GET_MODE_SIZE (from);
19922 unsigned to_size = GET_MODE_SIZE (to);
19923
19924 if (from_size != to_size)
19925 {
19926 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19927
19928 if (reg_classes_intersect_p (xclass, rclass))
19929 {
19930 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
19931 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
19932 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19933 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19934
19935 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19936 single register under VSX because the scalar part of the register
19937 is in the upper 64-bits, and not the lower 64-bits. Types like
19938 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19939 IEEE floating point can't overlap, and neither can small
19940 values. */
19941
19942 if (to_float128_vector_p && from_float128_vector_p)
19943 return true;
19944
19945 else if (to_float128_vector_p || from_float128_vector_p)
19946 return false;
19947
19948 /* TDmode in floating-mode registers must always go into a register
19949 pair with the most significant word in the even-numbered register
19950 to match ISA requirements. In little-endian mode, this does not
19951 match subreg numbering, so we cannot allow subregs. */
19952 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19953 return false;
19954
19955 if (from_size < 8 || to_size < 8)
19956 return false;
19957
19958 if (from_size == 8 && (8 * to_nregs) != to_size)
19959 return false;
19960
19961 if (to_size == 8 && (8 * from_nregs) != from_size)
19962 return false;
19963
19964 return true;
19965 }
19966 else
19967 return true;
19968 }
19969
19970 /* Since the VSX register set includes traditional floating point registers
19971 and altivec registers, just check for the size being different instead of
19972 trying to check whether the modes are vector modes. Otherwise it won't
19973 allow say DF and DI to change classes. For types like TFmode and TDmode
19974 that take 2 64-bit registers, rather than a single 128-bit register, don't
19975 allow subregs of those types to other 128 bit types. */
19976 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19977 {
19978 unsigned num_regs = (from_size + 15) / 16;
19979 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
19980 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
19981 return false;
19982
19983 return (from_size == 8 || from_size == 16);
19984 }
19985
19986 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19987 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19988 return false;
19989
19990 return true;
19991 }
19992
19993 /* Debug version of rs6000_can_change_mode_class. */
19994 static bool
19995 rs6000_debug_can_change_mode_class (machine_mode from,
19996 machine_mode to,
19997 reg_class_t rclass)
19998 {
19999 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20000
20001 fprintf (stderr,
20002 "rs6000_can_change_mode_class, return %s, from = %s, "
20003 "to = %s, rclass = %s\n",
20004 ret ? "true" : "false",
20005 GET_MODE_NAME (from), GET_MODE_NAME (to),
20006 reg_class_names[rclass]);
20007
20008 return ret;
20009 }
20010 \f
20011 /* Return a string to do a move operation of 128 bits of data. */
20012
20013 const char *
20014 rs6000_output_move_128bit (rtx operands[])
20015 {
20016 rtx dest = operands[0];
20017 rtx src = operands[1];
20018 machine_mode mode = GET_MODE (dest);
20019 int dest_regno;
20020 int src_regno;
20021 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20022 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20023
20024 if (REG_P (dest))
20025 {
20026 dest_regno = REGNO (dest);
20027 dest_gpr_p = INT_REGNO_P (dest_regno);
20028 dest_fp_p = FP_REGNO_P (dest_regno);
20029 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20030 dest_vsx_p = dest_fp_p | dest_vmx_p;
20031 }
20032 else
20033 {
20034 dest_regno = -1;
20035 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20036 }
20037
20038 if (REG_P (src))
20039 {
20040 src_regno = REGNO (src);
20041 src_gpr_p = INT_REGNO_P (src_regno);
20042 src_fp_p = FP_REGNO_P (src_regno);
20043 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20044 src_vsx_p = src_fp_p | src_vmx_p;
20045 }
20046 else
20047 {
20048 src_regno = -1;
20049 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20050 }
20051
20052 /* Register moves. */
20053 if (dest_regno >= 0 && src_regno >= 0)
20054 {
20055 if (dest_gpr_p)
20056 {
20057 if (src_gpr_p)
20058 return "#";
20059
20060 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20061 return (WORDS_BIG_ENDIAN
20062 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20063 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20064
20065 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20066 return "#";
20067 }
20068
20069 else if (TARGET_VSX && dest_vsx_p)
20070 {
20071 if (src_vsx_p)
20072 return "xxlor %x0,%x1,%x1";
20073
20074 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20075 return (WORDS_BIG_ENDIAN
20076 ? "mtvsrdd %x0,%1,%L1"
20077 : "mtvsrdd %x0,%L1,%1");
20078
20079 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20080 return "#";
20081 }
20082
20083 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20084 return "vor %0,%1,%1";
20085
20086 else if (dest_fp_p && src_fp_p)
20087 return "#";
20088 }
20089
20090 /* Loads. */
20091 else if (dest_regno >= 0 && MEM_P (src))
20092 {
20093 if (dest_gpr_p)
20094 {
20095 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20096 return "lq %0,%1";
20097 else
20098 return "#";
20099 }
20100
20101 else if (TARGET_ALTIVEC && dest_vmx_p
20102 && altivec_indexed_or_indirect_operand (src, mode))
20103 return "lvx %0,%y1";
20104
20105 else if (TARGET_VSX && dest_vsx_p)
20106 {
20107 if (mode_supports_dq_form (mode)
20108 && quad_address_p (XEXP (src, 0), mode, true))
20109 return "lxv %x0,%1";
20110
20111 else if (TARGET_P9_VECTOR)
20112 return "lxvx %x0,%y1";
20113
20114 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20115 return "lxvw4x %x0,%y1";
20116
20117 else
20118 return "lxvd2x %x0,%y1";
20119 }
20120
20121 else if (TARGET_ALTIVEC && dest_vmx_p)
20122 return "lvx %0,%y1";
20123
20124 else if (dest_fp_p)
20125 return "#";
20126 }
20127
20128 /* Stores. */
20129 else if (src_regno >= 0 && MEM_P (dest))
20130 {
20131 if (src_gpr_p)
20132 {
20133 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20134 return "stq %1,%0";
20135 else
20136 return "#";
20137 }
20138
20139 else if (TARGET_ALTIVEC && src_vmx_p
20140 && altivec_indexed_or_indirect_operand (dest, mode))
20141 return "stvx %1,%y0";
20142
20143 else if (TARGET_VSX && src_vsx_p)
20144 {
20145 if (mode_supports_dq_form (mode)
20146 && quad_address_p (XEXP (dest, 0), mode, true))
20147 return "stxv %x1,%0";
20148
20149 else if (TARGET_P9_VECTOR)
20150 return "stxvx %x1,%y0";
20151
20152 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20153 return "stxvw4x %x1,%y0";
20154
20155 else
20156 return "stxvd2x %x1,%y0";
20157 }
20158
20159 else if (TARGET_ALTIVEC && src_vmx_p)
20160 return "stvx %1,%y0";
20161
20162 else if (src_fp_p)
20163 return "#";
20164 }
20165
20166 /* Constants. */
20167 else if (dest_regno >= 0
20168 && (CONST_INT_P (src)
20169 || CONST_WIDE_INT_P (src)
20170 || CONST_DOUBLE_P (src)
20171 || GET_CODE (src) == CONST_VECTOR))
20172 {
20173 if (dest_gpr_p)
20174 return "#";
20175
20176 else if ((dest_vmx_p && TARGET_ALTIVEC)
20177 || (dest_vsx_p && TARGET_VSX))
20178 return output_vec_const_move (operands);
20179 }
20180
20181 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20182 }
20183
20184 /* Validate a 128-bit move. */
20185 bool
20186 rs6000_move_128bit_ok_p (rtx operands[])
20187 {
20188 machine_mode mode = GET_MODE (operands[0]);
20189 return (gpc_reg_operand (operands[0], mode)
20190 || gpc_reg_operand (operands[1], mode));
20191 }
20192
20193 /* Return true if a 128-bit move needs to be split. */
20194 bool
20195 rs6000_split_128bit_ok_p (rtx operands[])
20196 {
20197 if (!reload_completed)
20198 return false;
20199
20200 if (!gpr_or_gpr_p (operands[0], operands[1]))
20201 return false;
20202
20203 if (quad_load_store_p (operands[0], operands[1]))
20204 return false;
20205
20206 return true;
20207 }
20208
20209 \f
20210 /* Given a comparison operation, return the bit number in CCR to test. We
20211 know this is a valid comparison.
20212
20213 SCC_P is 1 if this is for an scc. That means that %D will have been
20214 used instead of %C, so the bits will be in different places.
20215
20216 Return -1 if OP isn't a valid comparison for some reason. */
20217
20218 int
20219 ccr_bit (rtx op, int scc_p)
20220 {
20221 enum rtx_code code = GET_CODE (op);
20222 machine_mode cc_mode;
20223 int cc_regnum;
20224 int base_bit;
20225 rtx reg;
20226
20227 if (!COMPARISON_P (op))
20228 return -1;
20229
20230 reg = XEXP (op, 0);
20231
20232 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
20233 return -1;
20234
20235 cc_mode = GET_MODE (reg);
20236 cc_regnum = REGNO (reg);
20237 base_bit = 4 * (cc_regnum - CR0_REGNO);
20238
20239 validate_condition_mode (code, cc_mode);
20240
20241 /* When generating a sCOND operation, only positive conditions are
20242 allowed. */
20243 if (scc_p)
20244 switch (code)
20245 {
20246 case EQ:
20247 case GT:
20248 case LT:
20249 case UNORDERED:
20250 case GTU:
20251 case LTU:
20252 break;
20253 default:
20254 return -1;
20255 }
20256
20257 switch (code)
20258 {
20259 case NE:
20260 return scc_p ? base_bit + 3 : base_bit + 2;
20261 case EQ:
20262 return base_bit + 2;
20263 case GT: case GTU: case UNLE:
20264 return base_bit + 1;
20265 case LT: case LTU: case UNGE:
20266 return base_bit;
20267 case ORDERED: case UNORDERED:
20268 return base_bit + 3;
20269
20270 case GE: case GEU:
20271 /* If scc, we will have done a cror to put the bit in the
20272 unordered position. So test that bit. For integer, this is ! LT
20273 unless this is an scc insn. */
20274 return scc_p ? base_bit + 3 : base_bit;
20275
20276 case LE: case LEU:
20277 return scc_p ? base_bit + 3 : base_bit + 1;
20278
20279 default:
20280 return -1;
20281 }
20282 }
20283 \f
20284 /* Return the GOT register. */
20285
20286 rtx
20287 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20288 {
20289 /* The second flow pass currently (June 1999) can't update
20290 regs_ever_live without disturbing other parts of the compiler, so
20291 update it here to make the prolog/epilogue code happy. */
20292 if (!can_create_pseudo_p ()
20293 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20294 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20295
20296 crtl->uses_pic_offset_table = 1;
20297
20298 return pic_offset_table_rtx;
20299 }
20300 \f
20301 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
20302
20303 /* Write out a function code label. */
20304
20305 void
20306 rs6000_output_function_entry (FILE *file, const char *fname)
20307 {
20308 if (fname[0] != '.')
20309 {
20310 switch (DEFAULT_ABI)
20311 {
20312 default:
20313 gcc_unreachable ();
20314
20315 case ABI_AIX:
20316 if (DOT_SYMBOLS)
20317 putc ('.', file);
20318 else
20319 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20320 break;
20321
20322 case ABI_ELFv2:
20323 case ABI_V4:
20324 case ABI_DARWIN:
20325 break;
20326 }
20327 }
20328
20329 RS6000_OUTPUT_BASENAME (file, fname);
20330 }
20331
20332 /* Print an operand. Recognize special options, documented below. */
20333
20334 #if TARGET_ELF
20335 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
20336 only introduced by the linker, when applying the sda21
20337 relocation. */
20338 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20339 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20340 #else
20341 #define SMALL_DATA_RELOC "sda21"
20342 #define SMALL_DATA_REG 0
20343 #endif
20344
20345 void
20346 print_operand (FILE *file, rtx x, int code)
20347 {
20348 int i;
20349 unsigned HOST_WIDE_INT uval;
20350
20351 switch (code)
20352 {
20353 /* %a is output_address. */
20354
20355 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20356 output_operand. */
20357
20358 case 'D':
20359 /* Like 'J' but get to the GT bit only. */
20360 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20361 {
20362 output_operand_lossage ("invalid %%D value");
20363 return;
20364 }
20365
20366 /* Bit 1 is GT bit. */
20367 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20368
20369 /* Add one for shift count in rlinm for scc. */
20370 fprintf (file, "%d", i + 1);
20371 return;
20372
20373 case 'e':
20374 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20375 if (! INT_P (x))
20376 {
20377 output_operand_lossage ("invalid %%e value");
20378 return;
20379 }
20380
20381 uval = INTVAL (x);
20382 if ((uval & 0xffff) == 0 && uval != 0)
20383 putc ('s', file);
20384 return;
20385
20386 case 'E':
20387 /* X is a CR register. Print the number of the EQ bit of the CR */
20388 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20389 output_operand_lossage ("invalid %%E value");
20390 else
20391 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20392 return;
20393
20394 case 'f':
20395 /* X is a CR register. Print the shift count needed to move it
20396 to the high-order four bits. */
20397 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20398 output_operand_lossage ("invalid %%f value");
20399 else
20400 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20401 return;
20402
20403 case 'F':
20404 /* Similar, but print the count for the rotate in the opposite
20405 direction. */
20406 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20407 output_operand_lossage ("invalid %%F value");
20408 else
20409 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20410 return;
20411
20412 case 'G':
20413 /* X is a constant integer. If it is negative, print "m",
20414 otherwise print "z". This is to make an aze or ame insn. */
20415 if (!CONST_INT_P (x))
20416 output_operand_lossage ("invalid %%G value");
20417 else if (INTVAL (x) >= 0)
20418 putc ('z', file);
20419 else
20420 putc ('m', file);
20421 return;
20422
20423 case 'h':
20424 /* If constant, output low-order five bits. Otherwise, write
20425 normally. */
20426 if (INT_P (x))
20427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20428 else
20429 print_operand (file, x, 0);
20430 return;
20431
20432 case 'H':
20433 /* If constant, output low-order six bits. Otherwise, write
20434 normally. */
20435 if (INT_P (x))
20436 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20437 else
20438 print_operand (file, x, 0);
20439 return;
20440
20441 case 'I':
20442 /* Print `i' if this is a constant, else nothing. */
20443 if (INT_P (x))
20444 putc ('i', file);
20445 return;
20446
20447 case 'j':
20448 /* Write the bit number in CCR for jump. */
20449 i = ccr_bit (x, 0);
20450 if (i == -1)
20451 output_operand_lossage ("invalid %%j code");
20452 else
20453 fprintf (file, "%d", i);
20454 return;
20455
20456 case 'J':
20457 /* Similar, but add one for shift count in rlinm for scc and pass
20458 scc flag to `ccr_bit'. */
20459 i = ccr_bit (x, 1);
20460 if (i == -1)
20461 output_operand_lossage ("invalid %%J code");
20462 else
20463 /* If we want bit 31, write a shift count of zero, not 32. */
20464 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20465 return;
20466
20467 case 'k':
20468 /* X must be a constant. Write the 1's complement of the
20469 constant. */
20470 if (! INT_P (x))
20471 output_operand_lossage ("invalid %%k value");
20472 else
20473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20474 return;
20475
20476 case 'K':
20477 /* X must be a symbolic constant on ELF. Write an
20478 expression suitable for an 'addi' that adds in the low 16
20479 bits of the MEM. */
20480 if (GET_CODE (x) == CONST)
20481 {
20482 if (GET_CODE (XEXP (x, 0)) != PLUS
20483 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
20484 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20485 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20486 output_operand_lossage ("invalid %%K value");
20487 }
20488 print_operand_address (file, x);
20489 fputs ("@l", file);
20490 return;
20491
20492 /* %l is output_asm_label. */
20493
20494 case 'L':
20495 /* Write second word of DImode or DFmode reference. Works on register
20496 or non-indexed memory only. */
20497 if (REG_P (x))
20498 fputs (reg_names[REGNO (x) + 1], file);
20499 else if (MEM_P (x))
20500 {
20501 machine_mode mode = GET_MODE (x);
20502 /* Handle possible auto-increment. Since it is pre-increment and
20503 we have already done it, we can just use an offset of word. */
20504 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20505 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20506 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20507 UNITS_PER_WORD));
20508 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20509 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20510 UNITS_PER_WORD));
20511 else
20512 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20513 UNITS_PER_WORD),
20514 0));
20515
20516 if (small_data_operand (x, GET_MODE (x)))
20517 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20518 reg_names[SMALL_DATA_REG]);
20519 }
20520 return;
20521
20522 case 'N': /* Unused */
20523 /* Write the number of elements in the vector times 4. */
20524 if (GET_CODE (x) != PARALLEL)
20525 output_operand_lossage ("invalid %%N value");
20526 else
20527 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20528 return;
20529
20530 case 'O': /* Unused */
20531 /* Similar, but subtract 1 first. */
20532 if (GET_CODE (x) != PARALLEL)
20533 output_operand_lossage ("invalid %%O value");
20534 else
20535 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20536 return;
20537
20538 case 'p':
20539 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20540 if (! INT_P (x)
20541 || INTVAL (x) < 0
20542 || (i = exact_log2 (INTVAL (x))) < 0)
20543 output_operand_lossage ("invalid %%p value");
20544 else
20545 fprintf (file, "%d", i);
20546 return;
20547
20548 case 'P':
20549 /* The operand must be an indirect memory reference. The result
20550 is the register name. */
20551 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
20552 || REGNO (XEXP (x, 0)) >= 32)
20553 output_operand_lossage ("invalid %%P value");
20554 else
20555 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20556 return;
20557
20558 case 'q':
20559 /* This outputs the logical code corresponding to a boolean
20560 expression. The expression may have one or both operands
20561 negated (if one, only the first one). For condition register
20562 logical operations, it will also treat the negated
20563 CR codes as NOTs, but not handle NOTs of them. */
20564 {
20565 const char *const *t = 0;
20566 const char *s;
20567 enum rtx_code code = GET_CODE (x);
20568 static const char * const tbl[3][3] = {
20569 { "and", "andc", "nor" },
20570 { "or", "orc", "nand" },
20571 { "xor", "eqv", "xor" } };
20572
20573 if (code == AND)
20574 t = tbl[0];
20575 else if (code == IOR)
20576 t = tbl[1];
20577 else if (code == XOR)
20578 t = tbl[2];
20579 else
20580 output_operand_lossage ("invalid %%q value");
20581
20582 if (GET_CODE (XEXP (x, 0)) != NOT)
20583 s = t[0];
20584 else
20585 {
20586 if (GET_CODE (XEXP (x, 1)) == NOT)
20587 s = t[2];
20588 else
20589 s = t[1];
20590 }
20591
20592 fputs (s, file);
20593 }
20594 return;
20595
20596 case 'Q':
20597 if (! TARGET_MFCRF)
20598 return;
20599 fputc (',', file);
20600 /* FALLTHRU */
20601
20602 case 'R':
20603 /* X is a CR register. Print the mask for `mtcrf'. */
20604 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20605 output_operand_lossage ("invalid %%R value");
20606 else
20607 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20608 return;
20609
20610 case 's':
20611 /* Low 5 bits of 32 - value */
20612 if (! INT_P (x))
20613 output_operand_lossage ("invalid %%s value");
20614 else
20615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20616 return;
20617
20618 case 't':
20619 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20620 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20621 {
20622 output_operand_lossage ("invalid %%t value");
20623 return;
20624 }
20625
20626 /* Bit 3 is OV bit. */
20627 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20628
20629 /* If we want bit 31, write a shift count of zero, not 32. */
20630 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20631 return;
20632
20633 case 'T':
20634 /* Print the symbolic name of a branch target register. */
20635 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20636 x = XVECEXP (x, 0, 0);
20637 if (!REG_P (x) || (REGNO (x) != LR_REGNO
20638 && REGNO (x) != CTR_REGNO))
20639 output_operand_lossage ("invalid %%T value");
20640 else if (REGNO (x) == LR_REGNO)
20641 fputs ("lr", file);
20642 else
20643 fputs ("ctr", file);
20644 return;
20645
20646 case 'u':
20647 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20648 for use in unsigned operand. */
20649 if (! INT_P (x))
20650 {
20651 output_operand_lossage ("invalid %%u value");
20652 return;
20653 }
20654
20655 uval = INTVAL (x);
20656 if ((uval & 0xffff) == 0)
20657 uval >>= 16;
20658
20659 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20660 return;
20661
20662 case 'v':
20663 /* High-order 16 bits of constant for use in signed operand. */
20664 if (! INT_P (x))
20665 output_operand_lossage ("invalid %%v value");
20666 else
20667 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20668 (INTVAL (x) >> 16) & 0xffff);
20669 return;
20670
20671 case 'U':
20672 /* Print `u' if this has an auto-increment or auto-decrement. */
20673 if (MEM_P (x)
20674 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20675 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20676 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20677 putc ('u', file);
20678 return;
20679
20680 case 'V':
20681 /* Print the trap code for this operand. */
20682 switch (GET_CODE (x))
20683 {
20684 case EQ:
20685 fputs ("eq", file); /* 4 */
20686 break;
20687 case NE:
20688 fputs ("ne", file); /* 24 */
20689 break;
20690 case LT:
20691 fputs ("lt", file); /* 16 */
20692 break;
20693 case LE:
20694 fputs ("le", file); /* 20 */
20695 break;
20696 case GT:
20697 fputs ("gt", file); /* 8 */
20698 break;
20699 case GE:
20700 fputs ("ge", file); /* 12 */
20701 break;
20702 case LTU:
20703 fputs ("llt", file); /* 2 */
20704 break;
20705 case LEU:
20706 fputs ("lle", file); /* 6 */
20707 break;
20708 case GTU:
20709 fputs ("lgt", file); /* 1 */
20710 break;
20711 case GEU:
20712 fputs ("lge", file); /* 5 */
20713 break;
20714 default:
20715 output_operand_lossage ("invalid %%V value");
20716 }
20717 break;
20718
20719 case 'w':
20720 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20721 normally. */
20722 if (INT_P (x))
20723 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20724 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20725 else
20726 print_operand (file, x, 0);
20727 return;
20728
20729 case 'x':
20730 /* X is a FPR or Altivec register used in a VSX context. */
20731 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
20732 output_operand_lossage ("invalid %%x value");
20733 else
20734 {
20735 int reg = REGNO (x);
20736 int vsx_reg = (FP_REGNO_P (reg)
20737 ? reg - 32
20738 : reg - FIRST_ALTIVEC_REGNO + 32);
20739
20740 #ifdef TARGET_REGNAMES
20741 if (TARGET_REGNAMES)
20742 fprintf (file, "%%vs%d", vsx_reg);
20743 else
20744 #endif
20745 fprintf (file, "%d", vsx_reg);
20746 }
20747 return;
20748
20749 case 'X':
20750 if (MEM_P (x)
20751 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20752 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20753 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20754 putc ('x', file);
20755 return;
20756
20757 case 'Y':
20758 /* Like 'L', for third word of TImode/PTImode */
20759 if (REG_P (x))
20760 fputs (reg_names[REGNO (x) + 2], file);
20761 else if (MEM_P (x))
20762 {
20763 machine_mode mode = GET_MODE (x);
20764 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20765 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20766 output_address (mode, plus_constant (Pmode,
20767 XEXP (XEXP (x, 0), 0), 8));
20768 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20769 output_address (mode, plus_constant (Pmode,
20770 XEXP (XEXP (x, 0), 0), 8));
20771 else
20772 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20773 if (small_data_operand (x, GET_MODE (x)))
20774 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20775 reg_names[SMALL_DATA_REG]);
20776 }
20777 return;
20778
20779 case 'z':
20780 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20781 x = XVECEXP (x, 0, 1);
20782 /* X is a SYMBOL_REF. Write out the name preceded by a
20783 period and without any trailing data in brackets. Used for function
20784 names. If we are configured for System V (or the embedded ABI) on
20785 the PowerPC, do not emit the period, since those systems do not use
20786 TOCs and the like. */
20787 if (!SYMBOL_REF_P (x))
20788 {
20789 output_operand_lossage ("invalid %%z value");
20790 return;
20791 }
20792
20793 /* For macho, check to see if we need a stub. */
20794 if (TARGET_MACHO)
20795 {
20796 const char *name = XSTR (x, 0);
20797 #if TARGET_MACHO
20798 if (darwin_picsymbol_stubs
20799 && MACHOPIC_INDIRECT
20800 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20801 name = machopic_indirection_name (x, /*stub_p=*/true);
20802 #endif
20803 assemble_name (file, name);
20804 }
20805 else if (!DOT_SYMBOLS)
20806 assemble_name (file, XSTR (x, 0));
20807 else
20808 rs6000_output_function_entry (file, XSTR (x, 0));
20809 return;
20810
20811 case 'Z':
20812 /* Like 'L', for last word of TImode/PTImode. */
20813 if (REG_P (x))
20814 fputs (reg_names[REGNO (x) + 3], file);
20815 else if (MEM_P (x))
20816 {
20817 machine_mode mode = GET_MODE (x);
20818 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20819 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20820 output_address (mode, plus_constant (Pmode,
20821 XEXP (XEXP (x, 0), 0), 12));
20822 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20823 output_address (mode, plus_constant (Pmode,
20824 XEXP (XEXP (x, 0), 0), 12));
20825 else
20826 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20827 if (small_data_operand (x, GET_MODE (x)))
20828 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20829 reg_names[SMALL_DATA_REG]);
20830 }
20831 return;
20832
20833 /* Print AltiVec memory operand. */
20834 case 'y':
20835 {
20836 rtx tmp;
20837
20838 gcc_assert (MEM_P (x));
20839
20840 tmp = XEXP (x, 0);
20841
20842 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20843 && GET_CODE (tmp) == AND
20844 && CONST_INT_P (XEXP (tmp, 1))
20845 && INTVAL (XEXP (tmp, 1)) == -16)
20846 tmp = XEXP (tmp, 0);
20847 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20848 && GET_CODE (tmp) == PRE_MODIFY)
20849 tmp = XEXP (tmp, 1);
20850 if (REG_P (tmp))
20851 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20852 else
20853 {
20854 if (GET_CODE (tmp) != PLUS
20855 || !REG_P (XEXP (tmp, 0))
20856 || !REG_P (XEXP (tmp, 1)))
20857 {
20858 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20859 break;
20860 }
20861
20862 if (REGNO (XEXP (tmp, 0)) == 0)
20863 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20864 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20865 else
20866 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20867 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20868 }
20869 break;
20870 }
20871
20872 case 0:
20873 if (REG_P (x))
20874 fprintf (file, "%s", reg_names[REGNO (x)]);
20875 else if (MEM_P (x))
20876 {
20877 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20878 know the width from the mode. */
20879 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20880 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20881 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20882 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20883 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20884 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20885 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20886 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20887 else
20888 output_address (GET_MODE (x), XEXP (x, 0));
20889 }
20890 else if (toc_relative_expr_p (x, false,
20891 &tocrel_base_oac, &tocrel_offset_oac))
20892 /* This hack along with a corresponding hack in
20893 rs6000_output_addr_const_extra arranges to output addends
20894 where the assembler expects to find them. eg.
20895 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20896 without this hack would be output as "x@toc+4". We
20897 want "x+4@toc". */
20898 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20899 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
20900 output_addr_const (file, XVECEXP (x, 0, 0));
20901 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20902 output_addr_const (file, XVECEXP (x, 0, 1));
20903 else
20904 output_addr_const (file, x);
20905 return;
20906
20907 case '&':
20908 if (const char *name = get_some_local_dynamic_name ())
20909 assemble_name (file, name);
20910 else
20911 output_operand_lossage ("'%%&' used without any "
20912 "local dynamic TLS references");
20913 return;
20914
20915 default:
20916 output_operand_lossage ("invalid %%xn code");
20917 }
20918 }
20919 \f
20920 /* Print the address of an operand. */
20921
20922 void
20923 print_operand_address (FILE *file, rtx x)
20924 {
20925 if (REG_P (x))
20926 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20927
20928 /* Is it a pc-relative address? */
20929 else if (pcrel_address (x, Pmode))
20930 {
20931 HOST_WIDE_INT offset;
20932
20933 if (GET_CODE (x) == CONST)
20934 x = XEXP (x, 0);
20935
20936 if (GET_CODE (x) == PLUS)
20937 {
20938 offset = INTVAL (XEXP (x, 1));
20939 x = XEXP (x, 0);
20940 }
20941 else
20942 offset = 0;
20943
20944 output_addr_const (file, x);
20945
20946 if (offset)
20947 fprintf (file, "%+" PRId64, offset);
20948
20949 fputs ("@pcrel", file);
20950 }
20951 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
20952 || GET_CODE (x) == LABEL_REF)
20953 {
20954 output_addr_const (file, x);
20955 if (small_data_operand (x, GET_MODE (x)))
20956 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20957 reg_names[SMALL_DATA_REG]);
20958 else
20959 gcc_assert (!TARGET_TOC);
20960 }
20961 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20962 && REG_P (XEXP (x, 1)))
20963 {
20964 if (REGNO (XEXP (x, 0)) == 0)
20965 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20966 reg_names[ REGNO (XEXP (x, 0)) ]);
20967 else
20968 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20969 reg_names[ REGNO (XEXP (x, 1)) ]);
20970 }
20971 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20972 && CONST_INT_P (XEXP (x, 1)))
20973 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20974 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20975 #if TARGET_MACHO
20976 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20977 && CONSTANT_P (XEXP (x, 1)))
20978 {
20979 fprintf (file, "lo16(");
20980 output_addr_const (file, XEXP (x, 1));
20981 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20982 }
20983 #endif
20984 #if TARGET_ELF
20985 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20986 && CONSTANT_P (XEXP (x, 1)))
20987 {
20988 output_addr_const (file, XEXP (x, 1));
20989 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20990 }
20991 #endif
20992 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
20993 {
20994 /* This hack along with a corresponding hack in
20995 rs6000_output_addr_const_extra arranges to output addends
20996 where the assembler expects to find them. eg.
20997 (lo_sum (reg 9)
20998 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20999 without this hack would be output as "x@toc+8@l(9)". We
21000 want "x+8@toc@l(9)". */
21001 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21002 if (GET_CODE (x) == LO_SUM)
21003 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21004 else
21005 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21006 }
21007 else
21008 output_addr_const (file, x);
21009 }
21010 \f
21011 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21012
21013 static bool
21014 rs6000_output_addr_const_extra (FILE *file, rtx x)
21015 {
21016 if (GET_CODE (x) == UNSPEC)
21017 switch (XINT (x, 1))
21018 {
21019 case UNSPEC_TOCREL:
21020 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
21021 && REG_P (XVECEXP (x, 0, 1))
21022 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21023 output_addr_const (file, XVECEXP (x, 0, 0));
21024 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21025 {
21026 if (INTVAL (tocrel_offset_oac) >= 0)
21027 fprintf (file, "+");
21028 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21029 }
21030 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21031 {
21032 putc ('-', file);
21033 assemble_name (file, toc_label_name);
21034 need_toc_init = 1;
21035 }
21036 else if (TARGET_ELF)
21037 fputs ("@toc", file);
21038 return true;
21039
21040 #if TARGET_MACHO
21041 case UNSPEC_MACHOPIC_OFFSET:
21042 output_addr_const (file, XVECEXP (x, 0, 0));
21043 putc ('-', file);
21044 machopic_output_function_base_name (file);
21045 return true;
21046 #endif
21047 }
21048 return false;
21049 }
21050 \f
21051 /* Target hook for assembling integer objects. The PowerPC version has
21052 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21053 is defined. It also needs to handle DI-mode objects on 64-bit
21054 targets. */
21055
21056 static bool
21057 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21058 {
21059 #ifdef RELOCATABLE_NEEDS_FIXUP
21060 /* Special handling for SI values. */
21061 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21062 {
21063 static int recurse = 0;
21064
21065 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21066 the .fixup section. Since the TOC section is already relocated, we
21067 don't need to mark it here. We used to skip the text section, but it
21068 should never be valid for relocated addresses to be placed in the text
21069 section. */
21070 if (DEFAULT_ABI == ABI_V4
21071 && (TARGET_RELOCATABLE || flag_pic > 1)
21072 && in_section != toc_section
21073 && !recurse
21074 && !CONST_SCALAR_INT_P (x)
21075 && CONSTANT_P (x))
21076 {
21077 char buf[256];
21078
21079 recurse = 1;
21080 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21081 fixuplabelno++;
21082 ASM_OUTPUT_LABEL (asm_out_file, buf);
21083 fprintf (asm_out_file, "\t.long\t(");
21084 output_addr_const (asm_out_file, x);
21085 fprintf (asm_out_file, ")@fixup\n");
21086 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21087 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21088 fprintf (asm_out_file, "\t.long\t");
21089 assemble_name (asm_out_file, buf);
21090 fprintf (asm_out_file, "\n\t.previous\n");
21091 recurse = 0;
21092 return true;
21093 }
21094 /* Remove initial .'s to turn a -mcall-aixdesc function
21095 address into the address of the descriptor, not the function
21096 itself. */
21097 else if (SYMBOL_REF_P (x)
21098 && XSTR (x, 0)[0] == '.'
21099 && DEFAULT_ABI == ABI_AIX)
21100 {
21101 const char *name = XSTR (x, 0);
21102 while (*name == '.')
21103 name++;
21104
21105 fprintf (asm_out_file, "\t.long\t%s\n", name);
21106 return true;
21107 }
21108 }
21109 #endif /* RELOCATABLE_NEEDS_FIXUP */
21110 return default_assemble_integer (x, size, aligned_p);
21111 }
21112
21113 /* Return a template string for assembly to emit when making an
21114 external call. FUNOP is the call mem argument operand number. */
21115
21116 static const char *
21117 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
21118 {
21119 /* -Wformat-overflow workaround, without which gcc thinks that %u
21120 might produce 10 digits. */
21121 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21122
21123 char arg[12];
21124 arg[0] = 0;
21125 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21126 {
21127 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21128 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
21129 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21130 sprintf (arg, "(%%&@tlsld)");
21131 else
21132 gcc_unreachable ();
21133 }
21134
21135 /* The magic 32768 offset here corresponds to the offset of
21136 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
21137 char z[11];
21138 sprintf (z, "%%z%u%s", funop,
21139 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
21140 ? "+32768" : ""));
21141
21142 static char str[32]; /* 1 spare */
21143 if (rs6000_pcrel_p (cfun))
21144 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
21145 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21146 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21147 sibcall ? "" : "\n\tnop");
21148 else if (DEFAULT_ABI == ABI_V4)
21149 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21150 flag_pic ? "@plt" : "");
21151 #if TARGET_MACHO
21152 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
21153 else if (DEFAULT_ABI == ABI_DARWIN)
21154 {
21155 /* The cookie is in operand func+2. */
21156 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
21157 int cookie = INTVAL (operands[funop + 2]);
21158 if (cookie & CALL_LONG)
21159 {
21160 tree funname = get_identifier (XSTR (operands[funop], 0));
21161 tree labelname = get_prev_label (funname);
21162 gcc_checking_assert (labelname && !sibcall);
21163
21164 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
21165 instruction will reach 'foo', otherwise link as 'bl L42'".
21166 "L42" should be a 'branch island', that will do a far jump to
21167 'foo'. Branch islands are generated in
21168 macho_branch_islands(). */
21169 sprintf (str, "jbsr %%z%u,%.10s", funop,
21170 IDENTIFIER_POINTER (labelname));
21171 }
21172 else
21173 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
21174 after the call. */
21175 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
21176 }
21177 #endif
21178 else
21179 gcc_unreachable ();
21180 return str;
21181 }
21182
21183 const char *
21184 rs6000_call_template (rtx *operands, unsigned int funop)
21185 {
21186 return rs6000_call_template_1 (operands, funop, false);
21187 }
21188
21189 const char *
21190 rs6000_sibcall_template (rtx *operands, unsigned int funop)
21191 {
21192 return rs6000_call_template_1 (operands, funop, true);
21193 }
21194
21195 /* As above, for indirect calls. */
21196
21197 static const char *
21198 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
21199 bool sibcall)
21200 {
21201 /* -Wformat-overflow workaround, without which gcc thinks that %u
21202 might produce 10 digits. Note that -Wformat-overflow will not
21203 currently warn here for str[], so do not rely on a warning to
21204 ensure str[] is correctly sized. */
21205 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21206
21207 /* Currently, funop is either 0 or 1. The maximum string is always
21208 a !speculate 64-bit __tls_get_addr call.
21209
21210 ABI_ELFv2, pcrel:
21211 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21212 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
21213 . 9 crset 2\n\t
21214 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21215 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
21216 . 8 beq%T1l-
21217 .---
21218 .142
21219
21220 ABI_AIX:
21221 . 9 ld 2,%3\n\t
21222 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21223 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21224 . 9 crset 2\n\t
21225 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21226 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21227 . 10 beq%T1l-\n\t
21228 . 10 ld 2,%4(1)
21229 .---
21230 .151
21231
21232 ABI_ELFv2:
21233 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21234 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21235 . 9 crset 2\n\t
21236 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21237 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21238 . 10 beq%T1l-\n\t
21239 . 10 ld 2,%3(1)
21240 .---
21241 .142
21242
21243 ABI_V4:
21244 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21245 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
21246 . 9 crset 2\n\t
21247 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21248 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
21249 . 8 beq%T1l-
21250 .---
21251 .141 */
21252 static char str[160]; /* 8 spare */
21253 char *s = str;
21254 const char *ptrload = TARGET_64BIT ? "d" : "wz";
21255
21256 if (DEFAULT_ABI == ABI_AIX)
21257 s += sprintf (s,
21258 "l%s 2,%%%u\n\t",
21259 ptrload, funop + 2);
21260
21261 /* We don't need the extra code to stop indirect call speculation if
21262 calling via LR. */
21263 bool speculate = (TARGET_MACHO
21264 || rs6000_speculate_indirect_jumps
21265 || (REG_P (operands[funop])
21266 && REGNO (operands[funop]) == LR_REGNO));
21267
21268 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
21269 {
21270 const char *rel64 = TARGET_64BIT ? "64" : "";
21271 char tls[29];
21272 tls[0] = 0;
21273 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21274 {
21275 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21276 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
21277 rel64, funop + 1);
21278 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21279 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21280 rel64);
21281 else
21282 gcc_unreachable ();
21283 }
21284
21285 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
21286 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21287 && flag_pic == 2 ? "+32768" : "");
21288 if (!speculate)
21289 {
21290 s += sprintf (s,
21291 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
21292 tls, rel64, notoc, funop, addend);
21293 s += sprintf (s, "crset 2\n\t");
21294 }
21295 s += sprintf (s,
21296 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
21297 tls, rel64, notoc, funop, addend);
21298 }
21299 else if (!speculate)
21300 s += sprintf (s, "crset 2\n\t");
21301
21302 if (rs6000_pcrel_p (cfun))
21303 {
21304 if (speculate)
21305 sprintf (s, "b%%T%ul", funop);
21306 else
21307 sprintf (s, "beq%%T%ul-", funop);
21308 }
21309 else if (DEFAULT_ABI == ABI_AIX)
21310 {
21311 if (speculate)
21312 sprintf (s,
21313 "b%%T%ul\n\t"
21314 "l%s 2,%%%u(1)",
21315 funop, ptrload, funop + 3);
21316 else
21317 sprintf (s,
21318 "beq%%T%ul-\n\t"
21319 "l%s 2,%%%u(1)",
21320 funop, ptrload, funop + 3);
21321 }
21322 else if (DEFAULT_ABI == ABI_ELFv2)
21323 {
21324 if (speculate)
21325 sprintf (s,
21326 "b%%T%ul\n\t"
21327 "l%s 2,%%%u(1)",
21328 funop, ptrload, funop + 2);
21329 else
21330 sprintf (s,
21331 "beq%%T%ul-\n\t"
21332 "l%s 2,%%%u(1)",
21333 funop, ptrload, funop + 2);
21334 }
21335 else
21336 {
21337 if (speculate)
21338 sprintf (s,
21339 "b%%T%u%s",
21340 funop, sibcall ? "" : "l");
21341 else
21342 sprintf (s,
21343 "beq%%T%u%s-%s",
21344 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
21345 }
21346 return str;
21347 }
21348
21349 const char *
21350 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
21351 {
21352 return rs6000_indirect_call_template_1 (operands, funop, false);
21353 }
21354
21355 const char *
21356 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
21357 {
21358 return rs6000_indirect_call_template_1 (operands, funop, true);
21359 }
21360
21361 #if HAVE_AS_PLTSEQ
21362 /* Output indirect call insns. WHICH identifies the type of sequence. */
21363 const char *
21364 rs6000_pltseq_template (rtx *operands, int which)
21365 {
21366 const char *rel64 = TARGET_64BIT ? "64" : "";
21367 char tls[30];
21368 tls[0] = 0;
21369 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
21370 {
21371 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
21372 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
21373 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
21374 off, rel64);
21375 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
21376 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
21377 off, rel64);
21378 else
21379 gcc_unreachable ();
21380 }
21381
21382 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
21383 static char str[96]; /* 10 spare */
21384 char off = WORDS_BIG_ENDIAN ? '2' : '4';
21385 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21386 && flag_pic == 2 ? "+32768" : "");
21387 switch (which)
21388 {
21389 case RS6000_PLTSEQ_TOCSAVE:
21390 sprintf (str,
21391 "st%s\n\t"
21392 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
21393 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
21394 tls, rel64);
21395 break;
21396 case RS6000_PLTSEQ_PLT16_HA:
21397 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
21398 sprintf (str,
21399 "lis %%0,0\n\t"
21400 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
21401 tls, off, rel64);
21402 else
21403 sprintf (str,
21404 "addis %%0,%%1,0\n\t"
21405 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
21406 tls, off, rel64, addend);
21407 break;
21408 case RS6000_PLTSEQ_PLT16_LO:
21409 sprintf (str,
21410 "l%s %%0,0(%%1)\n\t"
21411 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
21412 TARGET_64BIT ? "d" : "wz",
21413 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
21414 break;
21415 case RS6000_PLTSEQ_MTCTR:
21416 sprintf (str,
21417 "mtctr %%1\n\t"
21418 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
21419 tls, rel64, addend);
21420 break;
21421 case RS6000_PLTSEQ_PLT_PCREL34:
21422 sprintf (str,
21423 "pl%s %%0,0(0),1\n\t"
21424 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
21425 TARGET_64BIT ? "d" : "wz",
21426 tls, rel64);
21427 break;
21428 default:
21429 gcc_unreachable ();
21430 }
21431 return str;
21432 }
21433 #endif
21434
21435 /* Helper function to return whether a MODE can do prefixed loads/stores.
21436 VOIDmode is used when we are loading the pc-relative address into a base
21437 register, but we are not using it as part of a memory operation. As modes
21438 add support for prefixed memory, they will be added here. */
21439
21440 static bool
21441 mode_supports_prefixed_address_p (machine_mode mode)
21442 {
21443 return mode == VOIDmode;
21444 }
21445
21446 /* Function to return true if ADDR is a valid prefixed memory address that uses
21447 mode MODE. */
21448
21449 bool
21450 rs6000_prefixed_address (rtx addr, machine_mode mode)
21451 {
21452 if (!TARGET_PREFIXED_ADDR || !mode_supports_prefixed_address_p (mode))
21453 return false;
21454
21455 /* Check for PC-relative addresses. */
21456 if (pcrel_address (addr, Pmode))
21457 return true;
21458
21459 /* Check for prefixed memory addresses that have a large numeric offset,
21460 or an offset that can't be used for a DS/DQ-form memory operation. */
21461 if (GET_CODE (addr) == PLUS)
21462 {
21463 rtx op0 = XEXP (addr, 0);
21464 rtx op1 = XEXP (addr, 1);
21465
21466 if (!base_reg_operand (op0, Pmode) || !CONST_INT_P (op1))
21467 return false;
21468
21469 HOST_WIDE_INT value = INTVAL (op1);
21470 if (!SIGNED_34BIT_OFFSET_P (value, 0))
21471 return false;
21472
21473 /* Offset larger than 16-bits? */
21474 if (!SIGNED_16BIT_OFFSET_P (value, 0))
21475 return true;
21476
21477 /* DQ instruction (bottom 4 bits must be 0) for vectors. */
21478 HOST_WIDE_INT mask;
21479 if (GET_MODE_SIZE (mode) >= 16)
21480 mask = 15;
21481
21482 /* DS instruction (bottom 2 bits must be 0). For 32-bit integers, we
21483 need to use DS instructions if we are sign-extending the value with
21484 LWA. For 32-bit floating point, we need DS instructions to load and
21485 store values to the traditional Altivec registers. */
21486 else if (GET_MODE_SIZE (mode) >= 4)
21487 mask = 3;
21488
21489 /* QImode/HImode has no restrictions. */
21490 else
21491 return true;
21492
21493 /* Return true if we must use a prefixed instruction. */
21494 return (value & mask) != 0;
21495 }
21496
21497 return false;
21498 }
21499 \f
21500 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21501 /* Emit an assembler directive to set symbol visibility for DECL to
21502 VISIBILITY_TYPE. */
21503
21504 static void
21505 rs6000_assemble_visibility (tree decl, int vis)
21506 {
21507 if (TARGET_XCOFF)
21508 return;
21509
21510 /* Functions need to have their entry point symbol visibility set as
21511 well as their descriptor symbol visibility. */
21512 if (DEFAULT_ABI == ABI_AIX
21513 && DOT_SYMBOLS
21514 && TREE_CODE (decl) == FUNCTION_DECL)
21515 {
21516 static const char * const visibility_types[] = {
21517 NULL, "protected", "hidden", "internal"
21518 };
21519
21520 const char *name, *type;
21521
21522 name = ((* targetm.strip_name_encoding)
21523 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21524 type = visibility_types[vis];
21525
21526 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21527 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21528 }
21529 else
21530 default_assemble_visibility (decl, vis);
21531 }
21532 #endif
21533 \f
21534 enum rtx_code
21535 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21536 {
21537 /* Reversal of FP compares takes care -- an ordered compare
21538 becomes an unordered compare and vice versa. */
21539 if (mode == CCFPmode
21540 && (!flag_finite_math_only
21541 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21542 || code == UNEQ || code == LTGT))
21543 return reverse_condition_maybe_unordered (code);
21544 else
21545 return reverse_condition (code);
21546 }
21547
21548 /* Generate a compare for CODE. Return a brand-new rtx that
21549 represents the result of the compare. */
21550
21551 static rtx
21552 rs6000_generate_compare (rtx cmp, machine_mode mode)
21553 {
21554 machine_mode comp_mode;
21555 rtx compare_result;
21556 enum rtx_code code = GET_CODE (cmp);
21557 rtx op0 = XEXP (cmp, 0);
21558 rtx op1 = XEXP (cmp, 1);
21559
21560 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21561 comp_mode = CCmode;
21562 else if (FLOAT_MODE_P (mode))
21563 comp_mode = CCFPmode;
21564 else if (code == GTU || code == LTU
21565 || code == GEU || code == LEU)
21566 comp_mode = CCUNSmode;
21567 else if ((code == EQ || code == NE)
21568 && unsigned_reg_p (op0)
21569 && (unsigned_reg_p (op1)
21570 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21571 /* These are unsigned values, perhaps there will be a later
21572 ordering compare that can be shared with this one. */
21573 comp_mode = CCUNSmode;
21574 else
21575 comp_mode = CCmode;
21576
21577 /* If we have an unsigned compare, make sure we don't have a signed value as
21578 an immediate. */
21579 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
21580 && INTVAL (op1) < 0)
21581 {
21582 op0 = copy_rtx_if_shared (op0);
21583 op1 = force_reg (GET_MODE (op0), op1);
21584 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21585 }
21586
21587 /* First, the compare. */
21588 compare_result = gen_reg_rtx (comp_mode);
21589
21590 /* IEEE 128-bit support in VSX registers when we do not have hardware
21591 support. */
21592 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21593 {
21594 rtx libfunc = NULL_RTX;
21595 bool check_nan = false;
21596 rtx dest;
21597
21598 switch (code)
21599 {
21600 case EQ:
21601 case NE:
21602 libfunc = optab_libfunc (eq_optab, mode);
21603 break;
21604
21605 case GT:
21606 case GE:
21607 libfunc = optab_libfunc (ge_optab, mode);
21608 break;
21609
21610 case LT:
21611 case LE:
21612 libfunc = optab_libfunc (le_optab, mode);
21613 break;
21614
21615 case UNORDERED:
21616 case ORDERED:
21617 libfunc = optab_libfunc (unord_optab, mode);
21618 code = (code == UNORDERED) ? NE : EQ;
21619 break;
21620
21621 case UNGE:
21622 case UNGT:
21623 check_nan = true;
21624 libfunc = optab_libfunc (ge_optab, mode);
21625 code = (code == UNGE) ? GE : GT;
21626 break;
21627
21628 case UNLE:
21629 case UNLT:
21630 check_nan = true;
21631 libfunc = optab_libfunc (le_optab, mode);
21632 code = (code == UNLE) ? LE : LT;
21633 break;
21634
21635 case UNEQ:
21636 case LTGT:
21637 check_nan = true;
21638 libfunc = optab_libfunc (eq_optab, mode);
21639 code = (code = UNEQ) ? EQ : NE;
21640 break;
21641
21642 default:
21643 gcc_unreachable ();
21644 }
21645
21646 gcc_assert (libfunc);
21647
21648 if (!check_nan)
21649 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21650 SImode, op0, mode, op1, mode);
21651
21652 /* The library signals an exception for signalling NaNs, so we need to
21653 handle isgreater, etc. by first checking isordered. */
21654 else
21655 {
21656 rtx ne_rtx, normal_dest, unord_dest;
21657 rtx unord_func = optab_libfunc (unord_optab, mode);
21658 rtx join_label = gen_label_rtx ();
21659 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21660 rtx unord_cmp = gen_reg_rtx (comp_mode);
21661
21662
21663 /* Test for either value being a NaN. */
21664 gcc_assert (unord_func);
21665 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21666 SImode, op0, mode, op1, mode);
21667
21668 /* Set value (0) if either value is a NaN, and jump to the join
21669 label. */
21670 dest = gen_reg_rtx (SImode);
21671 emit_move_insn (dest, const1_rtx);
21672 emit_insn (gen_rtx_SET (unord_cmp,
21673 gen_rtx_COMPARE (comp_mode, unord_dest,
21674 const0_rtx)));
21675
21676 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21677 emit_jump_insn (gen_rtx_SET (pc_rtx,
21678 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21679 join_ref,
21680 pc_rtx)));
21681
21682 /* Do the normal comparison, knowing that the values are not
21683 NaNs. */
21684 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21685 SImode, op0, mode, op1, mode);
21686
21687 emit_insn (gen_cstoresi4 (dest,
21688 gen_rtx_fmt_ee (code, SImode, normal_dest,
21689 const0_rtx),
21690 normal_dest, const0_rtx));
21691
21692 /* Join NaN and non-Nan paths. Compare dest against 0. */
21693 emit_label (join_label);
21694 code = NE;
21695 }
21696
21697 emit_insn (gen_rtx_SET (compare_result,
21698 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21699 }
21700
21701 else
21702 {
21703 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21704 CLOBBERs to match cmptf_internal2 pattern. */
21705 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21706 && FLOAT128_IBM_P (GET_MODE (op0))
21707 && TARGET_HARD_FLOAT)
21708 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21709 gen_rtvec (10,
21710 gen_rtx_SET (compare_result,
21711 gen_rtx_COMPARE (comp_mode, op0, op1)),
21712 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21713 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21714 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21715 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21716 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21717 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21718 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21719 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21720 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21721 else if (GET_CODE (op1) == UNSPEC
21722 && XINT (op1, 1) == UNSPEC_SP_TEST)
21723 {
21724 rtx op1b = XVECEXP (op1, 0, 0);
21725 comp_mode = CCEQmode;
21726 compare_result = gen_reg_rtx (CCEQmode);
21727 if (TARGET_64BIT)
21728 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21729 else
21730 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21731 }
21732 else
21733 emit_insn (gen_rtx_SET (compare_result,
21734 gen_rtx_COMPARE (comp_mode, op0, op1)));
21735 }
21736
21737 /* Some kinds of FP comparisons need an OR operation;
21738 under flag_finite_math_only we don't bother. */
21739 if (FLOAT_MODE_P (mode)
21740 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21741 && !flag_finite_math_only
21742 && (code == LE || code == GE
21743 || code == UNEQ || code == LTGT
21744 || code == UNGT || code == UNLT))
21745 {
21746 enum rtx_code or1, or2;
21747 rtx or1_rtx, or2_rtx, compare2_rtx;
21748 rtx or_result = gen_reg_rtx (CCEQmode);
21749
21750 switch (code)
21751 {
21752 case LE: or1 = LT; or2 = EQ; break;
21753 case GE: or1 = GT; or2 = EQ; break;
21754 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21755 case LTGT: or1 = LT; or2 = GT; break;
21756 case UNGT: or1 = UNORDERED; or2 = GT; break;
21757 case UNLT: or1 = UNORDERED; or2 = LT; break;
21758 default: gcc_unreachable ();
21759 }
21760 validate_condition_mode (or1, comp_mode);
21761 validate_condition_mode (or2, comp_mode);
21762 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21763 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21764 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21765 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21766 const_true_rtx);
21767 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21768
21769 compare_result = or_result;
21770 code = EQ;
21771 }
21772
21773 validate_condition_mode (code, GET_MODE (compare_result));
21774
21775 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21776 }
21777
21778 \f
21779 /* Return the diagnostic message string if the binary operation OP is
21780 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21781
21782 static const char*
21783 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21784 const_tree type1,
21785 const_tree type2)
21786 {
21787 machine_mode mode1 = TYPE_MODE (type1);
21788 machine_mode mode2 = TYPE_MODE (type2);
21789
21790 /* For complex modes, use the inner type. */
21791 if (COMPLEX_MODE_P (mode1))
21792 mode1 = GET_MODE_INNER (mode1);
21793
21794 if (COMPLEX_MODE_P (mode2))
21795 mode2 = GET_MODE_INNER (mode2);
21796
21797 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21798 double to intermix unless -mfloat128-convert. */
21799 if (mode1 == mode2)
21800 return NULL;
21801
21802 if (!TARGET_FLOAT128_CVT)
21803 {
21804 if ((mode1 == KFmode && mode2 == IFmode)
21805 || (mode1 == IFmode && mode2 == KFmode))
21806 return N_("__float128 and __ibm128 cannot be used in the same "
21807 "expression");
21808
21809 if (TARGET_IEEEQUAD
21810 && ((mode1 == IFmode && mode2 == TFmode)
21811 || (mode1 == TFmode && mode2 == IFmode)))
21812 return N_("__ibm128 and long double cannot be used in the same "
21813 "expression");
21814
21815 if (!TARGET_IEEEQUAD
21816 && ((mode1 == KFmode && mode2 == TFmode)
21817 || (mode1 == TFmode && mode2 == KFmode)))
21818 return N_("__float128 and long double cannot be used in the same "
21819 "expression");
21820 }
21821
21822 return NULL;
21823 }
21824
21825 \f
21826 /* Expand floating point conversion to/from __float128 and __ibm128. */
21827
21828 void
21829 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21830 {
21831 machine_mode dest_mode = GET_MODE (dest);
21832 machine_mode src_mode = GET_MODE (src);
21833 convert_optab cvt = unknown_optab;
21834 bool do_move = false;
21835 rtx libfunc = NULL_RTX;
21836 rtx dest2;
21837 typedef rtx (*rtx_2func_t) (rtx, rtx);
21838 rtx_2func_t hw_convert = (rtx_2func_t)0;
21839 size_t kf_or_tf;
21840
21841 struct hw_conv_t {
21842 rtx_2func_t from_df;
21843 rtx_2func_t from_sf;
21844 rtx_2func_t from_si_sign;
21845 rtx_2func_t from_si_uns;
21846 rtx_2func_t from_di_sign;
21847 rtx_2func_t from_di_uns;
21848 rtx_2func_t to_df;
21849 rtx_2func_t to_sf;
21850 rtx_2func_t to_si_sign;
21851 rtx_2func_t to_si_uns;
21852 rtx_2func_t to_di_sign;
21853 rtx_2func_t to_di_uns;
21854 } hw_conversions[2] = {
21855 /* convertions to/from KFmode */
21856 {
21857 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21858 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21859 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21860 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21861 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21862 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21863 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21864 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21865 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21866 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21867 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21868 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21869 },
21870
21871 /* convertions to/from TFmode */
21872 {
21873 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21874 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21875 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21876 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21877 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21878 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21879 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21880 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21881 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21882 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21883 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21884 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21885 },
21886 };
21887
21888 if (dest_mode == src_mode)
21889 gcc_unreachable ();
21890
21891 /* Eliminate memory operations. */
21892 if (MEM_P (src))
21893 src = force_reg (src_mode, src);
21894
21895 if (MEM_P (dest))
21896 {
21897 rtx tmp = gen_reg_rtx (dest_mode);
21898 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21899 rs6000_emit_move (dest, tmp, dest_mode);
21900 return;
21901 }
21902
21903 /* Convert to IEEE 128-bit floating point. */
21904 if (FLOAT128_IEEE_P (dest_mode))
21905 {
21906 if (dest_mode == KFmode)
21907 kf_or_tf = 0;
21908 else if (dest_mode == TFmode)
21909 kf_or_tf = 1;
21910 else
21911 gcc_unreachable ();
21912
21913 switch (src_mode)
21914 {
21915 case E_DFmode:
21916 cvt = sext_optab;
21917 hw_convert = hw_conversions[kf_or_tf].from_df;
21918 break;
21919
21920 case E_SFmode:
21921 cvt = sext_optab;
21922 hw_convert = hw_conversions[kf_or_tf].from_sf;
21923 break;
21924
21925 case E_KFmode:
21926 case E_IFmode:
21927 case E_TFmode:
21928 if (FLOAT128_IBM_P (src_mode))
21929 cvt = sext_optab;
21930 else
21931 do_move = true;
21932 break;
21933
21934 case E_SImode:
21935 if (unsigned_p)
21936 {
21937 cvt = ufloat_optab;
21938 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21939 }
21940 else
21941 {
21942 cvt = sfloat_optab;
21943 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21944 }
21945 break;
21946
21947 case E_DImode:
21948 if (unsigned_p)
21949 {
21950 cvt = ufloat_optab;
21951 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21952 }
21953 else
21954 {
21955 cvt = sfloat_optab;
21956 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21957 }
21958 break;
21959
21960 default:
21961 gcc_unreachable ();
21962 }
21963 }
21964
21965 /* Convert from IEEE 128-bit floating point. */
21966 else if (FLOAT128_IEEE_P (src_mode))
21967 {
21968 if (src_mode == KFmode)
21969 kf_or_tf = 0;
21970 else if (src_mode == TFmode)
21971 kf_or_tf = 1;
21972 else
21973 gcc_unreachable ();
21974
21975 switch (dest_mode)
21976 {
21977 case E_DFmode:
21978 cvt = trunc_optab;
21979 hw_convert = hw_conversions[kf_or_tf].to_df;
21980 break;
21981
21982 case E_SFmode:
21983 cvt = trunc_optab;
21984 hw_convert = hw_conversions[kf_or_tf].to_sf;
21985 break;
21986
21987 case E_KFmode:
21988 case E_IFmode:
21989 case E_TFmode:
21990 if (FLOAT128_IBM_P (dest_mode))
21991 cvt = trunc_optab;
21992 else
21993 do_move = true;
21994 break;
21995
21996 case E_SImode:
21997 if (unsigned_p)
21998 {
21999 cvt = ufix_optab;
22000 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22001 }
22002 else
22003 {
22004 cvt = sfix_optab;
22005 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22006 }
22007 break;
22008
22009 case E_DImode:
22010 if (unsigned_p)
22011 {
22012 cvt = ufix_optab;
22013 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22014 }
22015 else
22016 {
22017 cvt = sfix_optab;
22018 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22019 }
22020 break;
22021
22022 default:
22023 gcc_unreachable ();
22024 }
22025 }
22026
22027 /* Both IBM format. */
22028 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22029 do_move = true;
22030
22031 else
22032 gcc_unreachable ();
22033
22034 /* Handle conversion between TFmode/KFmode/IFmode. */
22035 if (do_move)
22036 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
22037
22038 /* Handle conversion if we have hardware support. */
22039 else if (TARGET_FLOAT128_HW && hw_convert)
22040 emit_insn ((hw_convert) (dest, src));
22041
22042 /* Call an external function to do the conversion. */
22043 else if (cvt != unknown_optab)
22044 {
22045 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22046 gcc_assert (libfunc != NULL_RTX);
22047
22048 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22049 src, src_mode);
22050
22051 gcc_assert (dest2 != NULL_RTX);
22052 if (!rtx_equal_p (dest, dest2))
22053 emit_move_insn (dest, dest2);
22054 }
22055
22056 else
22057 gcc_unreachable ();
22058
22059 return;
22060 }
22061
22062 \f
22063 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22064 can be used as that dest register. Return the dest register. */
22065
22066 rtx
22067 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22068 {
22069 if (op2 == const0_rtx)
22070 return op1;
22071
22072 if (GET_CODE (scratch) == SCRATCH)
22073 scratch = gen_reg_rtx (mode);
22074
22075 if (logical_operand (op2, mode))
22076 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22077 else
22078 emit_insn (gen_rtx_SET (scratch,
22079 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22080
22081 return scratch;
22082 }
22083
22084 void
22085 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22086 {
22087 rtx condition_rtx;
22088 machine_mode op_mode;
22089 enum rtx_code cond_code;
22090 rtx result = operands[0];
22091
22092 condition_rtx = rs6000_generate_compare (operands[1], mode);
22093 cond_code = GET_CODE (condition_rtx);
22094
22095 if (cond_code == NE
22096 || cond_code == GE || cond_code == LE
22097 || cond_code == GEU || cond_code == LEU
22098 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22099 {
22100 rtx not_result = gen_reg_rtx (CCEQmode);
22101 rtx not_op, rev_cond_rtx;
22102 machine_mode cc_mode;
22103
22104 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22105
22106 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22107 SImode, XEXP (condition_rtx, 0), const0_rtx);
22108 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22109 emit_insn (gen_rtx_SET (not_result, not_op));
22110 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22111 }
22112
22113 op_mode = GET_MODE (XEXP (operands[1], 0));
22114 if (op_mode == VOIDmode)
22115 op_mode = GET_MODE (XEXP (operands[1], 1));
22116
22117 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22118 {
22119 PUT_MODE (condition_rtx, DImode);
22120 convert_move (result, condition_rtx, 0);
22121 }
22122 else
22123 {
22124 PUT_MODE (condition_rtx, SImode);
22125 emit_insn (gen_rtx_SET (result, condition_rtx));
22126 }
22127 }
22128
22129 /* Emit a branch of kind CODE to location LOC. */
22130
22131 void
22132 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22133 {
22134 rtx condition_rtx, loc_ref;
22135
22136 condition_rtx = rs6000_generate_compare (operands[0], mode);
22137 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22138 emit_jump_insn (gen_rtx_SET (pc_rtx,
22139 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22140 loc_ref, pc_rtx)));
22141 }
22142
22143 /* Return the string to output a conditional branch to LABEL, which is
22144 the operand template of the label, or NULL if the branch is really a
22145 conditional return.
22146
22147 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22148 condition code register and its mode specifies what kind of
22149 comparison we made.
22150
22151 REVERSED is nonzero if we should reverse the sense of the comparison.
22152
22153 INSN is the insn. */
22154
22155 char *
22156 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22157 {
22158 static char string[64];
22159 enum rtx_code code = GET_CODE (op);
22160 rtx cc_reg = XEXP (op, 0);
22161 machine_mode mode = GET_MODE (cc_reg);
22162 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22163 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22164 int really_reversed = reversed ^ need_longbranch;
22165 char *s = string;
22166 const char *ccode;
22167 const char *pred;
22168 rtx note;
22169
22170 validate_condition_mode (code, mode);
22171
22172 /* Work out which way this really branches. We could use
22173 reverse_condition_maybe_unordered here always but this
22174 makes the resulting assembler clearer. */
22175 if (really_reversed)
22176 {
22177 /* Reversal of FP compares takes care -- an ordered compare
22178 becomes an unordered compare and vice versa. */
22179 if (mode == CCFPmode)
22180 code = reverse_condition_maybe_unordered (code);
22181 else
22182 code = reverse_condition (code);
22183 }
22184
22185 switch (code)
22186 {
22187 /* Not all of these are actually distinct opcodes, but
22188 we distinguish them for clarity of the resulting assembler. */
22189 case NE: case LTGT:
22190 ccode = "ne"; break;
22191 case EQ: case UNEQ:
22192 ccode = "eq"; break;
22193 case GE: case GEU:
22194 ccode = "ge"; break;
22195 case GT: case GTU: case UNGT:
22196 ccode = "gt"; break;
22197 case LE: case LEU:
22198 ccode = "le"; break;
22199 case LT: case LTU: case UNLT:
22200 ccode = "lt"; break;
22201 case UNORDERED: ccode = "un"; break;
22202 case ORDERED: ccode = "nu"; break;
22203 case UNGE: ccode = "nl"; break;
22204 case UNLE: ccode = "ng"; break;
22205 default:
22206 gcc_unreachable ();
22207 }
22208
22209 /* Maybe we have a guess as to how likely the branch is. */
22210 pred = "";
22211 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22212 if (note != NULL_RTX)
22213 {
22214 /* PROB is the difference from 50%. */
22215 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22216 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22217
22218 /* Only hint for highly probable/improbable branches on newer cpus when
22219 we have real profile data, as static prediction overrides processor
22220 dynamic prediction. For older cpus we may as well always hint, but
22221 assume not taken for branches that are very close to 50% as a
22222 mispredicted taken branch is more expensive than a
22223 mispredicted not-taken branch. */
22224 if (rs6000_always_hint
22225 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22226 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22227 && br_prob_note_reliable_p (note)))
22228 {
22229 if (abs (prob) > REG_BR_PROB_BASE / 20
22230 && ((prob > 0) ^ need_longbranch))
22231 pred = "+";
22232 else
22233 pred = "-";
22234 }
22235 }
22236
22237 if (label == NULL)
22238 s += sprintf (s, "b%slr%s ", ccode, pred);
22239 else
22240 s += sprintf (s, "b%s%s ", ccode, pred);
22241
22242 /* We need to escape any '%' characters in the reg_names string.
22243 Assume they'd only be the first character.... */
22244 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22245 *s++ = '%';
22246 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22247
22248 if (label != NULL)
22249 {
22250 /* If the branch distance was too far, we may have to use an
22251 unconditional branch to go the distance. */
22252 if (need_longbranch)
22253 s += sprintf (s, ",$+8\n\tb %s", label);
22254 else
22255 s += sprintf (s, ",%s", label);
22256 }
22257
22258 return string;
22259 }
22260
22261 /* Return insn for VSX or Altivec comparisons. */
22262
22263 static rtx
22264 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22265 {
22266 rtx mask;
22267 machine_mode mode = GET_MODE (op0);
22268
22269 switch (code)
22270 {
22271 default:
22272 break;
22273
22274 case GE:
22275 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22276 return NULL_RTX;
22277 /* FALLTHRU */
22278
22279 case EQ:
22280 case GT:
22281 case GTU:
22282 case ORDERED:
22283 case UNORDERED:
22284 case UNEQ:
22285 case LTGT:
22286 mask = gen_reg_rtx (mode);
22287 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22288 return mask;
22289 }
22290
22291 return NULL_RTX;
22292 }
22293
22294 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22295 DMODE is expected destination mode. This is a recursive function. */
22296
22297 static rtx
22298 rs6000_emit_vector_compare (enum rtx_code rcode,
22299 rtx op0, rtx op1,
22300 machine_mode dmode)
22301 {
22302 rtx mask;
22303 bool swap_operands = false;
22304 bool try_again = false;
22305
22306 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22307 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22308
22309 /* See if the comparison works as is. */
22310 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22311 if (mask)
22312 return mask;
22313
22314 switch (rcode)
22315 {
22316 case LT:
22317 rcode = GT;
22318 swap_operands = true;
22319 try_again = true;
22320 break;
22321 case LTU:
22322 rcode = GTU;
22323 swap_operands = true;
22324 try_again = true;
22325 break;
22326 case NE:
22327 case UNLE:
22328 case UNLT:
22329 case UNGE:
22330 case UNGT:
22331 /* Invert condition and try again.
22332 e.g., A != B becomes ~(A==B). */
22333 {
22334 enum rtx_code rev_code;
22335 enum insn_code nor_code;
22336 rtx mask2;
22337
22338 rev_code = reverse_condition_maybe_unordered (rcode);
22339 if (rev_code == UNKNOWN)
22340 return NULL_RTX;
22341
22342 nor_code = optab_handler (one_cmpl_optab, dmode);
22343 if (nor_code == CODE_FOR_nothing)
22344 return NULL_RTX;
22345
22346 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22347 if (!mask2)
22348 return NULL_RTX;
22349
22350 mask = gen_reg_rtx (dmode);
22351 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22352 return mask;
22353 }
22354 break;
22355 case GE:
22356 case GEU:
22357 case LE:
22358 case LEU:
22359 /* Try GT/GTU/LT/LTU OR EQ */
22360 {
22361 rtx c_rtx, eq_rtx;
22362 enum insn_code ior_code;
22363 enum rtx_code new_code;
22364
22365 switch (rcode)
22366 {
22367 case GE:
22368 new_code = GT;
22369 break;
22370
22371 case GEU:
22372 new_code = GTU;
22373 break;
22374
22375 case LE:
22376 new_code = LT;
22377 break;
22378
22379 case LEU:
22380 new_code = LTU;
22381 break;
22382
22383 default:
22384 gcc_unreachable ();
22385 }
22386
22387 ior_code = optab_handler (ior_optab, dmode);
22388 if (ior_code == CODE_FOR_nothing)
22389 return NULL_RTX;
22390
22391 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22392 if (!c_rtx)
22393 return NULL_RTX;
22394
22395 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22396 if (!eq_rtx)
22397 return NULL_RTX;
22398
22399 mask = gen_reg_rtx (dmode);
22400 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22401 return mask;
22402 }
22403 break;
22404 default:
22405 return NULL_RTX;
22406 }
22407
22408 if (try_again)
22409 {
22410 if (swap_operands)
22411 std::swap (op0, op1);
22412
22413 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22414 if (mask)
22415 return mask;
22416 }
22417
22418 /* You only get two chances. */
22419 return NULL_RTX;
22420 }
22421
22422 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22423 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22424 operands for the relation operation COND. */
22425
22426 int
22427 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22428 rtx cond, rtx cc_op0, rtx cc_op1)
22429 {
22430 machine_mode dest_mode = GET_MODE (dest);
22431 machine_mode mask_mode = GET_MODE (cc_op0);
22432 enum rtx_code rcode = GET_CODE (cond);
22433 machine_mode cc_mode = CCmode;
22434 rtx mask;
22435 rtx cond2;
22436 bool invert_move = false;
22437
22438 if (VECTOR_UNIT_NONE_P (dest_mode))
22439 return 0;
22440
22441 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22442 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22443
22444 switch (rcode)
22445 {
22446 /* Swap operands if we can, and fall back to doing the operation as
22447 specified, and doing a NOR to invert the test. */
22448 case NE:
22449 case UNLE:
22450 case UNLT:
22451 case UNGE:
22452 case UNGT:
22453 /* Invert condition and try again.
22454 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22455 invert_move = true;
22456 rcode = reverse_condition_maybe_unordered (rcode);
22457 if (rcode == UNKNOWN)
22458 return 0;
22459 break;
22460
22461 case GE:
22462 case LE:
22463 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22464 {
22465 /* Invert condition to avoid compound test. */
22466 invert_move = true;
22467 rcode = reverse_condition (rcode);
22468 }
22469 break;
22470
22471 case GTU:
22472 case GEU:
22473 case LTU:
22474 case LEU:
22475 /* Mark unsigned tests with CCUNSmode. */
22476 cc_mode = CCUNSmode;
22477
22478 /* Invert condition to avoid compound test if necessary. */
22479 if (rcode == GEU || rcode == LEU)
22480 {
22481 invert_move = true;
22482 rcode = reverse_condition (rcode);
22483 }
22484 break;
22485
22486 default:
22487 break;
22488 }
22489
22490 /* Get the vector mask for the given relational operations. */
22491 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22492
22493 if (!mask)
22494 return 0;
22495
22496 if (invert_move)
22497 std::swap (op_true, op_false);
22498
22499 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22500 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22501 && (GET_CODE (op_true) == CONST_VECTOR
22502 || GET_CODE (op_false) == CONST_VECTOR))
22503 {
22504 rtx constant_0 = CONST0_RTX (dest_mode);
22505 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22506
22507 if (op_true == constant_m1 && op_false == constant_0)
22508 {
22509 emit_move_insn (dest, mask);
22510 return 1;
22511 }
22512
22513 else if (op_true == constant_0 && op_false == constant_m1)
22514 {
22515 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22516 return 1;
22517 }
22518
22519 /* If we can't use the vector comparison directly, perhaps we can use
22520 the mask for the true or false fields, instead of loading up a
22521 constant. */
22522 if (op_true == constant_m1)
22523 op_true = mask;
22524
22525 if (op_false == constant_0)
22526 op_false = mask;
22527 }
22528
22529 if (!REG_P (op_true) && !SUBREG_P (op_true))
22530 op_true = force_reg (dest_mode, op_true);
22531
22532 if (!REG_P (op_false) && !SUBREG_P (op_false))
22533 op_false = force_reg (dest_mode, op_false);
22534
22535 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22536 CONST0_RTX (dest_mode));
22537 emit_insn (gen_rtx_SET (dest,
22538 gen_rtx_IF_THEN_ELSE (dest_mode,
22539 cond2,
22540 op_true,
22541 op_false)));
22542 return 1;
22543 }
22544
22545 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22546 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22547 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22548 hardware has no such operation. */
22549
22550 static int
22551 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22552 {
22553 enum rtx_code code = GET_CODE (op);
22554 rtx op0 = XEXP (op, 0);
22555 rtx op1 = XEXP (op, 1);
22556 machine_mode compare_mode = GET_MODE (op0);
22557 machine_mode result_mode = GET_MODE (dest);
22558 bool max_p = false;
22559
22560 if (result_mode != compare_mode)
22561 return 0;
22562
22563 if (code == GE || code == GT)
22564 max_p = true;
22565 else if (code == LE || code == LT)
22566 max_p = false;
22567 else
22568 return 0;
22569
22570 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22571 ;
22572
22573 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22574 max_p = !max_p;
22575
22576 else
22577 return 0;
22578
22579 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22580 return 1;
22581 }
22582
22583 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22584 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22585 operands of the last comparison is nonzero/true, FALSE_COND if it is
22586 zero/false. Return 0 if the hardware has no such operation. */
22587
22588 static int
22589 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22590 {
22591 enum rtx_code code = GET_CODE (op);
22592 rtx op0 = XEXP (op, 0);
22593 rtx op1 = XEXP (op, 1);
22594 machine_mode result_mode = GET_MODE (dest);
22595 rtx compare_rtx;
22596 rtx cmove_rtx;
22597 rtx clobber_rtx;
22598
22599 if (!can_create_pseudo_p ())
22600 return 0;
22601
22602 switch (code)
22603 {
22604 case EQ:
22605 case GE:
22606 case GT:
22607 break;
22608
22609 case NE:
22610 case LT:
22611 case LE:
22612 code = swap_condition (code);
22613 std::swap (op0, op1);
22614 break;
22615
22616 default:
22617 return 0;
22618 }
22619
22620 /* Generate: [(parallel [(set (dest)
22621 (if_then_else (op (cmp1) (cmp2))
22622 (true)
22623 (false)))
22624 (clobber (scratch))])]. */
22625
22626 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22627 cmove_rtx = gen_rtx_SET (dest,
22628 gen_rtx_IF_THEN_ELSE (result_mode,
22629 compare_rtx,
22630 true_cond,
22631 false_cond));
22632
22633 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22634 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22635 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22636
22637 return 1;
22638 }
22639
22640 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22641 operands of the last comparison is nonzero/true, FALSE_COND if it
22642 is zero/false. Return 0 if the hardware has no such operation. */
22643
22644 int
22645 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22646 {
22647 enum rtx_code code = GET_CODE (op);
22648 rtx op0 = XEXP (op, 0);
22649 rtx op1 = XEXP (op, 1);
22650 machine_mode compare_mode = GET_MODE (op0);
22651 machine_mode result_mode = GET_MODE (dest);
22652 rtx temp;
22653 bool is_against_zero;
22654
22655 /* These modes should always match. */
22656 if (GET_MODE (op1) != compare_mode
22657 /* In the isel case however, we can use a compare immediate, so
22658 op1 may be a small constant. */
22659 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22660 return 0;
22661 if (GET_MODE (true_cond) != result_mode)
22662 return 0;
22663 if (GET_MODE (false_cond) != result_mode)
22664 return 0;
22665
22666 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22667 if (TARGET_P9_MINMAX
22668 && (compare_mode == SFmode || compare_mode == DFmode)
22669 && (result_mode == SFmode || result_mode == DFmode))
22670 {
22671 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22672 return 1;
22673
22674 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22675 return 1;
22676 }
22677
22678 /* Don't allow using floating point comparisons for integer results for
22679 now. */
22680 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22681 return 0;
22682
22683 /* First, work out if the hardware can do this at all, or
22684 if it's too slow.... */
22685 if (!FLOAT_MODE_P (compare_mode))
22686 {
22687 if (TARGET_ISEL)
22688 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22689 return 0;
22690 }
22691
22692 is_against_zero = op1 == CONST0_RTX (compare_mode);
22693
22694 /* A floating-point subtract might overflow, underflow, or produce
22695 an inexact result, thus changing the floating-point flags, so it
22696 can't be generated if we care about that. It's safe if one side
22697 of the construct is zero, since then no subtract will be
22698 generated. */
22699 if (SCALAR_FLOAT_MODE_P (compare_mode)
22700 && flag_trapping_math && ! is_against_zero)
22701 return 0;
22702
22703 /* Eliminate half of the comparisons by switching operands, this
22704 makes the remaining code simpler. */
22705 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22706 || code == LTGT || code == LT || code == UNLE)
22707 {
22708 code = reverse_condition_maybe_unordered (code);
22709 temp = true_cond;
22710 true_cond = false_cond;
22711 false_cond = temp;
22712 }
22713
22714 /* UNEQ and LTGT take four instructions for a comparison with zero,
22715 it'll probably be faster to use a branch here too. */
22716 if (code == UNEQ && HONOR_NANS (compare_mode))
22717 return 0;
22718
22719 /* We're going to try to implement comparisons by performing
22720 a subtract, then comparing against zero. Unfortunately,
22721 Inf - Inf is NaN which is not zero, and so if we don't
22722 know that the operand is finite and the comparison
22723 would treat EQ different to UNORDERED, we can't do it. */
22724 if (HONOR_INFINITIES (compare_mode)
22725 && code != GT && code != UNGE
22726 && (!CONST_DOUBLE_P (op1)
22727 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22728 /* Constructs of the form (a OP b ? a : b) are safe. */
22729 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22730 || (! rtx_equal_p (op0, true_cond)
22731 && ! rtx_equal_p (op1, true_cond))))
22732 return 0;
22733
22734 /* At this point we know we can use fsel. */
22735
22736 /* Reduce the comparison to a comparison against zero. */
22737 if (! is_against_zero)
22738 {
22739 temp = gen_reg_rtx (compare_mode);
22740 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22741 op0 = temp;
22742 op1 = CONST0_RTX (compare_mode);
22743 }
22744
22745 /* If we don't care about NaNs we can reduce some of the comparisons
22746 down to faster ones. */
22747 if (! HONOR_NANS (compare_mode))
22748 switch (code)
22749 {
22750 case GT:
22751 code = LE;
22752 temp = true_cond;
22753 true_cond = false_cond;
22754 false_cond = temp;
22755 break;
22756 case UNGE:
22757 code = GE;
22758 break;
22759 case UNEQ:
22760 code = EQ;
22761 break;
22762 default:
22763 break;
22764 }
22765
22766 /* Now, reduce everything down to a GE. */
22767 switch (code)
22768 {
22769 case GE:
22770 break;
22771
22772 case LE:
22773 temp = gen_reg_rtx (compare_mode);
22774 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22775 op0 = temp;
22776 break;
22777
22778 case ORDERED:
22779 temp = gen_reg_rtx (compare_mode);
22780 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22781 op0 = temp;
22782 break;
22783
22784 case EQ:
22785 temp = gen_reg_rtx (compare_mode);
22786 emit_insn (gen_rtx_SET (temp,
22787 gen_rtx_NEG (compare_mode,
22788 gen_rtx_ABS (compare_mode, op0))));
22789 op0 = temp;
22790 break;
22791
22792 case UNGE:
22793 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22794 temp = gen_reg_rtx (result_mode);
22795 emit_insn (gen_rtx_SET (temp,
22796 gen_rtx_IF_THEN_ELSE (result_mode,
22797 gen_rtx_GE (VOIDmode,
22798 op0, op1),
22799 true_cond, false_cond)));
22800 false_cond = true_cond;
22801 true_cond = temp;
22802
22803 temp = gen_reg_rtx (compare_mode);
22804 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22805 op0 = temp;
22806 break;
22807
22808 case GT:
22809 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22810 temp = gen_reg_rtx (result_mode);
22811 emit_insn (gen_rtx_SET (temp,
22812 gen_rtx_IF_THEN_ELSE (result_mode,
22813 gen_rtx_GE (VOIDmode,
22814 op0, op1),
22815 true_cond, false_cond)));
22816 true_cond = false_cond;
22817 false_cond = temp;
22818
22819 temp = gen_reg_rtx (compare_mode);
22820 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22821 op0 = temp;
22822 break;
22823
22824 default:
22825 gcc_unreachable ();
22826 }
22827
22828 emit_insn (gen_rtx_SET (dest,
22829 gen_rtx_IF_THEN_ELSE (result_mode,
22830 gen_rtx_GE (VOIDmode,
22831 op0, op1),
22832 true_cond, false_cond)));
22833 return 1;
22834 }
22835
22836 /* Same as above, but for ints (isel). */
22837
22838 int
22839 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22840 {
22841 rtx condition_rtx, cr;
22842 machine_mode mode = GET_MODE (dest);
22843 enum rtx_code cond_code;
22844 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22845 bool signedp;
22846
22847 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22848 return 0;
22849
22850 /* We still have to do the compare, because isel doesn't do a
22851 compare, it just looks at the CRx bits set by a previous compare
22852 instruction. */
22853 condition_rtx = rs6000_generate_compare (op, mode);
22854 cond_code = GET_CODE (condition_rtx);
22855 cr = XEXP (condition_rtx, 0);
22856 signedp = GET_MODE (cr) == CCmode;
22857
22858 isel_func = (mode == SImode
22859 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22860 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22861
22862 switch (cond_code)
22863 {
22864 case LT: case GT: case LTU: case GTU: case EQ:
22865 /* isel handles these directly. */
22866 break;
22867
22868 default:
22869 /* We need to swap the sense of the comparison. */
22870 {
22871 std::swap (false_cond, true_cond);
22872 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22873 }
22874 break;
22875 }
22876
22877 false_cond = force_reg (mode, false_cond);
22878 if (true_cond != const0_rtx)
22879 true_cond = force_reg (mode, true_cond);
22880
22881 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22882
22883 return 1;
22884 }
22885
22886 void
22887 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22888 {
22889 machine_mode mode = GET_MODE (op0);
22890 enum rtx_code c;
22891 rtx target;
22892
22893 /* VSX/altivec have direct min/max insns. */
22894 if ((code == SMAX || code == SMIN)
22895 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22896 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22897 {
22898 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22899 return;
22900 }
22901
22902 if (code == SMAX || code == SMIN)
22903 c = GE;
22904 else
22905 c = GEU;
22906
22907 if (code == SMAX || code == UMAX)
22908 target = emit_conditional_move (dest, c, op0, op1, mode,
22909 op0, op1, mode, 0);
22910 else
22911 target = emit_conditional_move (dest, c, op0, op1, mode,
22912 op1, op0, mode, 0);
22913 gcc_assert (target);
22914 if (target != dest)
22915 emit_move_insn (dest, target);
22916 }
22917
22918 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22919 COND is true. Mark the jump as unlikely to be taken. */
22920
22921 static void
22922 emit_unlikely_jump (rtx cond, rtx label)
22923 {
22924 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22925 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22926 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22927 }
22928
22929 /* A subroutine of the atomic operation splitters. Emit a load-locked
22930 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22931 the zero_extend operation. */
22932
22933 static void
22934 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22935 {
22936 rtx (*fn) (rtx, rtx) = NULL;
22937
22938 switch (mode)
22939 {
22940 case E_QImode:
22941 fn = gen_load_lockedqi;
22942 break;
22943 case E_HImode:
22944 fn = gen_load_lockedhi;
22945 break;
22946 case E_SImode:
22947 if (GET_MODE (mem) == QImode)
22948 fn = gen_load_lockedqi_si;
22949 else if (GET_MODE (mem) == HImode)
22950 fn = gen_load_lockedhi_si;
22951 else
22952 fn = gen_load_lockedsi;
22953 break;
22954 case E_DImode:
22955 fn = gen_load_lockeddi;
22956 break;
22957 case E_TImode:
22958 fn = gen_load_lockedti;
22959 break;
22960 default:
22961 gcc_unreachable ();
22962 }
22963 emit_insn (fn (reg, mem));
22964 }
22965
22966 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22967 instruction in MODE. */
22968
22969 static void
22970 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22971 {
22972 rtx (*fn) (rtx, rtx, rtx) = NULL;
22973
22974 switch (mode)
22975 {
22976 case E_QImode:
22977 fn = gen_store_conditionalqi;
22978 break;
22979 case E_HImode:
22980 fn = gen_store_conditionalhi;
22981 break;
22982 case E_SImode:
22983 fn = gen_store_conditionalsi;
22984 break;
22985 case E_DImode:
22986 fn = gen_store_conditionaldi;
22987 break;
22988 case E_TImode:
22989 fn = gen_store_conditionalti;
22990 break;
22991 default:
22992 gcc_unreachable ();
22993 }
22994
22995 /* Emit sync before stwcx. to address PPC405 Erratum. */
22996 if (PPC405_ERRATUM77)
22997 emit_insn (gen_hwsync ());
22998
22999 emit_insn (fn (res, mem, val));
23000 }
23001
23002 /* Expand barriers before and after a load_locked/store_cond sequence. */
23003
23004 static rtx
23005 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23006 {
23007 rtx addr = XEXP (mem, 0);
23008
23009 if (!legitimate_indirect_address_p (addr, reload_completed)
23010 && !legitimate_indexed_address_p (addr, reload_completed))
23011 {
23012 addr = force_reg (Pmode, addr);
23013 mem = replace_equiv_address_nv (mem, addr);
23014 }
23015
23016 switch (model)
23017 {
23018 case MEMMODEL_RELAXED:
23019 case MEMMODEL_CONSUME:
23020 case MEMMODEL_ACQUIRE:
23021 break;
23022 case MEMMODEL_RELEASE:
23023 case MEMMODEL_ACQ_REL:
23024 emit_insn (gen_lwsync ());
23025 break;
23026 case MEMMODEL_SEQ_CST:
23027 emit_insn (gen_hwsync ());
23028 break;
23029 default:
23030 gcc_unreachable ();
23031 }
23032 return mem;
23033 }
23034
23035 static void
23036 rs6000_post_atomic_barrier (enum memmodel model)
23037 {
23038 switch (model)
23039 {
23040 case MEMMODEL_RELAXED:
23041 case MEMMODEL_CONSUME:
23042 case MEMMODEL_RELEASE:
23043 break;
23044 case MEMMODEL_ACQUIRE:
23045 case MEMMODEL_ACQ_REL:
23046 case MEMMODEL_SEQ_CST:
23047 emit_insn (gen_isync ());
23048 break;
23049 default:
23050 gcc_unreachable ();
23051 }
23052 }
23053
23054 /* A subroutine of the various atomic expanders. For sub-word operations,
23055 we must adjust things to operate on SImode. Given the original MEM,
23056 return a new aligned memory. Also build and return the quantities by
23057 which to shift and mask. */
23058
23059 static rtx
23060 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23061 {
23062 rtx addr, align, shift, mask, mem;
23063 HOST_WIDE_INT shift_mask;
23064 machine_mode mode = GET_MODE (orig_mem);
23065
23066 /* For smaller modes, we have to implement this via SImode. */
23067 shift_mask = (mode == QImode ? 0x18 : 0x10);
23068
23069 addr = XEXP (orig_mem, 0);
23070 addr = force_reg (GET_MODE (addr), addr);
23071
23072 /* Aligned memory containing subword. Generate a new memory. We
23073 do not want any of the existing MEM_ATTR data, as we're now
23074 accessing memory outside the original object. */
23075 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23076 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23077 mem = gen_rtx_MEM (SImode, align);
23078 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23079 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23080 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23081
23082 /* Shift amount for subword relative to aligned word. */
23083 shift = gen_reg_rtx (SImode);
23084 addr = gen_lowpart (SImode, addr);
23085 rtx tmp = gen_reg_rtx (SImode);
23086 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23087 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23088 if (BYTES_BIG_ENDIAN)
23089 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23090 shift, 1, OPTAB_LIB_WIDEN);
23091 *pshift = shift;
23092
23093 /* Mask for insertion. */
23094 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23095 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23096 *pmask = mask;
23097
23098 return mem;
23099 }
23100
23101 /* A subroutine of the various atomic expanders. For sub-word operands,
23102 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23103
23104 static rtx
23105 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23106 {
23107 rtx x;
23108
23109 x = gen_reg_rtx (SImode);
23110 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23111 gen_rtx_NOT (SImode, mask),
23112 oldval)));
23113
23114 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23115
23116 return x;
23117 }
23118
23119 /* A subroutine of the various atomic expanders. For sub-word operands,
23120 extract WIDE to NARROW via SHIFT. */
23121
23122 static void
23123 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23124 {
23125 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23126 wide, 1, OPTAB_LIB_WIDEN);
23127 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23128 }
23129
23130 /* Expand an atomic compare and swap operation. */
23131
23132 void
23133 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23134 {
23135 rtx boolval, retval, mem, oldval, newval, cond;
23136 rtx label1, label2, x, mask, shift;
23137 machine_mode mode, orig_mode;
23138 enum memmodel mod_s, mod_f;
23139 bool is_weak;
23140
23141 boolval = operands[0];
23142 retval = operands[1];
23143 mem = operands[2];
23144 oldval = operands[3];
23145 newval = operands[4];
23146 is_weak = (INTVAL (operands[5]) != 0);
23147 mod_s = memmodel_base (INTVAL (operands[6]));
23148 mod_f = memmodel_base (INTVAL (operands[7]));
23149 orig_mode = mode = GET_MODE (mem);
23150
23151 mask = shift = NULL_RTX;
23152 if (mode == QImode || mode == HImode)
23153 {
23154 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23155 lwarx and shift/mask operations. With power8, we need to do the
23156 comparison in SImode, but the store is still done in QI/HImode. */
23157 oldval = convert_modes (SImode, mode, oldval, 1);
23158
23159 if (!TARGET_SYNC_HI_QI)
23160 {
23161 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23162
23163 /* Shift and mask OLDVAL into position with the word. */
23164 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23165 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23166
23167 /* Shift and mask NEWVAL into position within the word. */
23168 newval = convert_modes (SImode, mode, newval, 1);
23169 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23170 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23171 }
23172
23173 /* Prepare to adjust the return value. */
23174 retval = gen_reg_rtx (SImode);
23175 mode = SImode;
23176 }
23177 else if (reg_overlap_mentioned_p (retval, oldval))
23178 oldval = copy_to_reg (oldval);
23179
23180 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23181 oldval = copy_to_mode_reg (mode, oldval);
23182
23183 if (reg_overlap_mentioned_p (retval, newval))
23184 newval = copy_to_reg (newval);
23185
23186 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23187
23188 label1 = NULL_RTX;
23189 if (!is_weak)
23190 {
23191 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23192 emit_label (XEXP (label1, 0));
23193 }
23194 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23195
23196 emit_load_locked (mode, retval, mem);
23197
23198 x = retval;
23199 if (mask)
23200 x = expand_simple_binop (SImode, AND, retval, mask,
23201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23202
23203 cond = gen_reg_rtx (CCmode);
23204 /* If we have TImode, synthesize a comparison. */
23205 if (mode != TImode)
23206 x = gen_rtx_COMPARE (CCmode, x, oldval);
23207 else
23208 {
23209 rtx xor1_result = gen_reg_rtx (DImode);
23210 rtx xor2_result = gen_reg_rtx (DImode);
23211 rtx or_result = gen_reg_rtx (DImode);
23212 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23213 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23214 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23215 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23216
23217 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23218 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23219 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23220 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23221 }
23222
23223 emit_insn (gen_rtx_SET (cond, x));
23224
23225 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23226 emit_unlikely_jump (x, label2);
23227
23228 x = newval;
23229 if (mask)
23230 x = rs6000_mask_atomic_subword (retval, newval, mask);
23231
23232 emit_store_conditional (orig_mode, cond, mem, x);
23233
23234 if (!is_weak)
23235 {
23236 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23237 emit_unlikely_jump (x, label1);
23238 }
23239
23240 if (!is_mm_relaxed (mod_f))
23241 emit_label (XEXP (label2, 0));
23242
23243 rs6000_post_atomic_barrier (mod_s);
23244
23245 if (is_mm_relaxed (mod_f))
23246 emit_label (XEXP (label2, 0));
23247
23248 if (shift)
23249 rs6000_finish_atomic_subword (operands[1], retval, shift);
23250 else if (mode != GET_MODE (operands[1]))
23251 convert_move (operands[1], retval, 1);
23252
23253 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23254 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23255 emit_insn (gen_rtx_SET (boolval, x));
23256 }
23257
23258 /* Expand an atomic exchange operation. */
23259
23260 void
23261 rs6000_expand_atomic_exchange (rtx operands[])
23262 {
23263 rtx retval, mem, val, cond;
23264 machine_mode mode;
23265 enum memmodel model;
23266 rtx label, x, mask, shift;
23267
23268 retval = operands[0];
23269 mem = operands[1];
23270 val = operands[2];
23271 model = memmodel_base (INTVAL (operands[3]));
23272 mode = GET_MODE (mem);
23273
23274 mask = shift = NULL_RTX;
23275 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23276 {
23277 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23278
23279 /* Shift and mask VAL into position with the word. */
23280 val = convert_modes (SImode, mode, val, 1);
23281 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23282 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23283
23284 /* Prepare to adjust the return value. */
23285 retval = gen_reg_rtx (SImode);
23286 mode = SImode;
23287 }
23288
23289 mem = rs6000_pre_atomic_barrier (mem, model);
23290
23291 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23292 emit_label (XEXP (label, 0));
23293
23294 emit_load_locked (mode, retval, mem);
23295
23296 x = val;
23297 if (mask)
23298 x = rs6000_mask_atomic_subword (retval, val, mask);
23299
23300 cond = gen_reg_rtx (CCmode);
23301 emit_store_conditional (mode, cond, mem, x);
23302
23303 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23304 emit_unlikely_jump (x, label);
23305
23306 rs6000_post_atomic_barrier (model);
23307
23308 if (shift)
23309 rs6000_finish_atomic_subword (operands[0], retval, shift);
23310 }
23311
23312 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23313 to perform. MEM is the memory on which to operate. VAL is the second
23314 operand of the binary operator. BEFORE and AFTER are optional locations to
23315 return the value of MEM either before of after the operation. MODEL_RTX
23316 is a CONST_INT containing the memory model to use. */
23317
23318 void
23319 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23320 rtx orig_before, rtx orig_after, rtx model_rtx)
23321 {
23322 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23323 machine_mode mode = GET_MODE (mem);
23324 machine_mode store_mode = mode;
23325 rtx label, x, cond, mask, shift;
23326 rtx before = orig_before, after = orig_after;
23327
23328 mask = shift = NULL_RTX;
23329 /* On power8, we want to use SImode for the operation. On previous systems,
23330 use the operation in a subword and shift/mask to get the proper byte or
23331 halfword. */
23332 if (mode == QImode || mode == HImode)
23333 {
23334 if (TARGET_SYNC_HI_QI)
23335 {
23336 val = convert_modes (SImode, mode, val, 1);
23337
23338 /* Prepare to adjust the return value. */
23339 before = gen_reg_rtx (SImode);
23340 if (after)
23341 after = gen_reg_rtx (SImode);
23342 mode = SImode;
23343 }
23344 else
23345 {
23346 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23347
23348 /* Shift and mask VAL into position with the word. */
23349 val = convert_modes (SImode, mode, val, 1);
23350 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23351 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23352
23353 switch (code)
23354 {
23355 case IOR:
23356 case XOR:
23357 /* We've already zero-extended VAL. That is sufficient to
23358 make certain that it does not affect other bits. */
23359 mask = NULL;
23360 break;
23361
23362 case AND:
23363 /* If we make certain that all of the other bits in VAL are
23364 set, that will be sufficient to not affect other bits. */
23365 x = gen_rtx_NOT (SImode, mask);
23366 x = gen_rtx_IOR (SImode, x, val);
23367 emit_insn (gen_rtx_SET (val, x));
23368 mask = NULL;
23369 break;
23370
23371 case NOT:
23372 case PLUS:
23373 case MINUS:
23374 /* These will all affect bits outside the field and need
23375 adjustment via MASK within the loop. */
23376 break;
23377
23378 default:
23379 gcc_unreachable ();
23380 }
23381
23382 /* Prepare to adjust the return value. */
23383 before = gen_reg_rtx (SImode);
23384 if (after)
23385 after = gen_reg_rtx (SImode);
23386 store_mode = mode = SImode;
23387 }
23388 }
23389
23390 mem = rs6000_pre_atomic_barrier (mem, model);
23391
23392 label = gen_label_rtx ();
23393 emit_label (label);
23394 label = gen_rtx_LABEL_REF (VOIDmode, label);
23395
23396 if (before == NULL_RTX)
23397 before = gen_reg_rtx (mode);
23398
23399 emit_load_locked (mode, before, mem);
23400
23401 if (code == NOT)
23402 {
23403 x = expand_simple_binop (mode, AND, before, val,
23404 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23405 after = expand_simple_unop (mode, NOT, x, after, 1);
23406 }
23407 else
23408 {
23409 after = expand_simple_binop (mode, code, before, val,
23410 after, 1, OPTAB_LIB_WIDEN);
23411 }
23412
23413 x = after;
23414 if (mask)
23415 {
23416 x = expand_simple_binop (SImode, AND, after, mask,
23417 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23418 x = rs6000_mask_atomic_subword (before, x, mask);
23419 }
23420 else if (store_mode != mode)
23421 x = convert_modes (store_mode, mode, x, 1);
23422
23423 cond = gen_reg_rtx (CCmode);
23424 emit_store_conditional (store_mode, cond, mem, x);
23425
23426 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23427 emit_unlikely_jump (x, label);
23428
23429 rs6000_post_atomic_barrier (model);
23430
23431 if (shift)
23432 {
23433 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23434 then do the calcuations in a SImode register. */
23435 if (orig_before)
23436 rs6000_finish_atomic_subword (orig_before, before, shift);
23437 if (orig_after)
23438 rs6000_finish_atomic_subword (orig_after, after, shift);
23439 }
23440 else if (store_mode != mode)
23441 {
23442 /* QImode/HImode on machines with lbarx/lharx where we do the native
23443 operation and then do the calcuations in a SImode register. */
23444 if (orig_before)
23445 convert_move (orig_before, before, 1);
23446 if (orig_after)
23447 convert_move (orig_after, after, 1);
23448 }
23449 else if (orig_after && after != orig_after)
23450 emit_move_insn (orig_after, after);
23451 }
23452
23453 /* Emit instructions to move SRC to DST. Called by splitters for
23454 multi-register moves. It will emit at most one instruction for
23455 each register that is accessed; that is, it won't emit li/lis pairs
23456 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23457 register. */
23458
23459 void
23460 rs6000_split_multireg_move (rtx dst, rtx src)
23461 {
23462 /* The register number of the first register being moved. */
23463 int reg;
23464 /* The mode that is to be moved. */
23465 machine_mode mode;
23466 /* The mode that the move is being done in, and its size. */
23467 machine_mode reg_mode;
23468 int reg_mode_size;
23469 /* The number of registers that will be moved. */
23470 int nregs;
23471
23472 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23473 mode = GET_MODE (dst);
23474 nregs = hard_regno_nregs (reg, mode);
23475 if (FP_REGNO_P (reg))
23476 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23477 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23478 else if (ALTIVEC_REGNO_P (reg))
23479 reg_mode = V16QImode;
23480 else
23481 reg_mode = word_mode;
23482 reg_mode_size = GET_MODE_SIZE (reg_mode);
23483
23484 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23485
23486 /* TDmode residing in FP registers is special, since the ISA requires that
23487 the lower-numbered word of a register pair is always the most significant
23488 word, even in little-endian mode. This does not match the usual subreg
23489 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23490 the appropriate constituent registers "by hand" in little-endian mode.
23491
23492 Note we do not need to check for destructive overlap here since TDmode
23493 can only reside in even/odd register pairs. */
23494 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23495 {
23496 rtx p_src, p_dst;
23497 int i;
23498
23499 for (i = 0; i < nregs; i++)
23500 {
23501 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23502 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23503 else
23504 p_src = simplify_gen_subreg (reg_mode, src, mode,
23505 i * reg_mode_size);
23506
23507 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23508 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23509 else
23510 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23511 i * reg_mode_size);
23512
23513 emit_insn (gen_rtx_SET (p_dst, p_src));
23514 }
23515
23516 return;
23517 }
23518
23519 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23520 {
23521 /* Move register range backwards, if we might have destructive
23522 overlap. */
23523 int i;
23524 for (i = nregs - 1; i >= 0; i--)
23525 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23526 i * reg_mode_size),
23527 simplify_gen_subreg (reg_mode, src, mode,
23528 i * reg_mode_size)));
23529 }
23530 else
23531 {
23532 int i;
23533 int j = -1;
23534 bool used_update = false;
23535 rtx restore_basereg = NULL_RTX;
23536
23537 if (MEM_P (src) && INT_REGNO_P (reg))
23538 {
23539 rtx breg;
23540
23541 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23542 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23543 {
23544 rtx delta_rtx;
23545 breg = XEXP (XEXP (src, 0), 0);
23546 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23547 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23548 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23549 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23550 src = replace_equiv_address (src, breg);
23551 }
23552 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23553 {
23554 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23555 {
23556 rtx basereg = XEXP (XEXP (src, 0), 0);
23557 if (TARGET_UPDATE)
23558 {
23559 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23560 emit_insn (gen_rtx_SET (ndst,
23561 gen_rtx_MEM (reg_mode,
23562 XEXP (src, 0))));
23563 used_update = true;
23564 }
23565 else
23566 emit_insn (gen_rtx_SET (basereg,
23567 XEXP (XEXP (src, 0), 1)));
23568 src = replace_equiv_address (src, basereg);
23569 }
23570 else
23571 {
23572 rtx basereg = gen_rtx_REG (Pmode, reg);
23573 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23574 src = replace_equiv_address (src, basereg);
23575 }
23576 }
23577
23578 breg = XEXP (src, 0);
23579 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23580 breg = XEXP (breg, 0);
23581
23582 /* If the base register we are using to address memory is
23583 also a destination reg, then change that register last. */
23584 if (REG_P (breg)
23585 && REGNO (breg) >= REGNO (dst)
23586 && REGNO (breg) < REGNO (dst) + nregs)
23587 j = REGNO (breg) - REGNO (dst);
23588 }
23589 else if (MEM_P (dst) && INT_REGNO_P (reg))
23590 {
23591 rtx breg;
23592
23593 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23594 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23595 {
23596 rtx delta_rtx;
23597 breg = XEXP (XEXP (dst, 0), 0);
23598 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23599 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23600 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23601
23602 /* We have to update the breg before doing the store.
23603 Use store with update, if available. */
23604
23605 if (TARGET_UPDATE)
23606 {
23607 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23608 emit_insn (TARGET_32BIT
23609 ? (TARGET_POWERPC64
23610 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23611 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
23612 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23613 used_update = true;
23614 }
23615 else
23616 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23617 dst = replace_equiv_address (dst, breg);
23618 }
23619 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23620 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23621 {
23622 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23623 {
23624 rtx basereg = XEXP (XEXP (dst, 0), 0);
23625 if (TARGET_UPDATE)
23626 {
23627 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23628 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23629 XEXP (dst, 0)),
23630 nsrc));
23631 used_update = true;
23632 }
23633 else
23634 emit_insn (gen_rtx_SET (basereg,
23635 XEXP (XEXP (dst, 0), 1)));
23636 dst = replace_equiv_address (dst, basereg);
23637 }
23638 else
23639 {
23640 rtx basereg = XEXP (XEXP (dst, 0), 0);
23641 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23642 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23643 && REG_P (basereg)
23644 && REG_P (offsetreg)
23645 && REGNO (basereg) != REGNO (offsetreg));
23646 if (REGNO (basereg) == 0)
23647 {
23648 rtx tmp = offsetreg;
23649 offsetreg = basereg;
23650 basereg = tmp;
23651 }
23652 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23653 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23654 dst = replace_equiv_address (dst, basereg);
23655 }
23656 }
23657 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23658 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23659 }
23660
23661 for (i = 0; i < nregs; i++)
23662 {
23663 /* Calculate index to next subword. */
23664 ++j;
23665 if (j == nregs)
23666 j = 0;
23667
23668 /* If compiler already emitted move of first word by
23669 store with update, no need to do anything. */
23670 if (j == 0 && used_update)
23671 continue;
23672
23673 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23674 j * reg_mode_size),
23675 simplify_gen_subreg (reg_mode, src, mode,
23676 j * reg_mode_size)));
23677 }
23678 if (restore_basereg != NULL_RTX)
23679 emit_insn (restore_basereg);
23680 }
23681 }
23682
23683 static GTY(()) alias_set_type set = -1;
23684
23685 alias_set_type
23686 get_TOC_alias_set (void)
23687 {
23688 if (set == -1)
23689 set = new_alias_set ();
23690 return set;
23691 }
23692
23693 /* Return the internal arg pointer used for function incoming
23694 arguments. When -fsplit-stack, the arg pointer is r12 so we need
23695 to copy it to a pseudo in order for it to be preserved over calls
23696 and suchlike. We'd really like to use a pseudo here for the
23697 internal arg pointer but data-flow analysis is not prepared to
23698 accept pseudos as live at the beginning of a function. */
23699
23700 static rtx
23701 rs6000_internal_arg_pointer (void)
23702 {
23703 if (flag_split_stack
23704 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
23705 == NULL))
23706
23707 {
23708 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
23709 {
23710 rtx pat;
23711
23712 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
23713 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
23714
23715 /* Put the pseudo initialization right after the note at the
23716 beginning of the function. */
23717 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
23718 gen_rtx_REG (Pmode, 12));
23719 push_topmost_sequence ();
23720 emit_insn_after (pat, get_insns ());
23721 pop_topmost_sequence ();
23722 }
23723 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
23724 FIRST_PARM_OFFSET (current_function_decl));
23725 return copy_to_reg (ret);
23726 }
23727 return virtual_incoming_args_rtx;
23728 }
23729
23730 /* We may have to tell the dataflow pass that the split stack prologue
23731 is initializing a register. */
23732
23733 static void
23734 rs6000_live_on_entry (bitmap regs)
23735 {
23736 if (flag_split_stack)
23737 bitmap_set_bit (regs, 12);
23738 }
23739
23740 \f
23741 /* A C compound statement that outputs the assembler code for a thunk
23742 function, used to implement C++ virtual function calls with
23743 multiple inheritance. The thunk acts as a wrapper around a virtual
23744 function, adjusting the implicit object parameter before handing
23745 control off to the real function.
23746
23747 First, emit code to add the integer DELTA to the location that
23748 contains the incoming first argument. Assume that this argument
23749 contains a pointer, and is the one used to pass the `this' pointer
23750 in C++. This is the incoming argument *before* the function
23751 prologue, e.g. `%o0' on a sparc. The addition must preserve the
23752 values of all other incoming arguments.
23753
23754 After the addition, emit code to jump to FUNCTION, which is a
23755 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
23756 not touch the return address. Hence returning from FUNCTION will
23757 return to whoever called the current `thunk'.
23758
23759 The effect must be as if FUNCTION had been called directly with the
23760 adjusted first argument. This macro is responsible for emitting
23761 all of the code for a thunk function; output_function_prologue()
23762 and output_function_epilogue() are not invoked.
23763
23764 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
23765 been extracted from it.) It might possibly be useful on some
23766 targets, but probably not.
23767
23768 If you do not define this macro, the target-independent code in the
23769 C++ frontend will generate a less efficient heavyweight thunk that
23770 calls FUNCTION instead of jumping to it. The generic approach does
23771 not support varargs. */
23772
23773 static void
23774 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
23775 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
23776 tree function)
23777 {
23778 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23779 rtx this_rtx, funexp;
23780 rtx_insn *insn;
23781
23782 reload_completed = 1;
23783 epilogue_completed = 1;
23784
23785 /* Mark the end of the (empty) prologue. */
23786 emit_note (NOTE_INSN_PROLOGUE_END);
23787
23788 /* Find the "this" pointer. If the function returns a structure,
23789 the structure return pointer is in r3. */
23790 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
23791 this_rtx = gen_rtx_REG (Pmode, 4);
23792 else
23793 this_rtx = gen_rtx_REG (Pmode, 3);
23794
23795 /* Apply the constant offset, if required. */
23796 if (delta)
23797 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
23798
23799 /* Apply the offset from the vtable, if required. */
23800 if (vcall_offset)
23801 {
23802 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
23803 rtx tmp = gen_rtx_REG (Pmode, 12);
23804
23805 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
23806 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
23807 {
23808 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
23809 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
23810 }
23811 else
23812 {
23813 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
23814
23815 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
23816 }
23817 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
23818 }
23819
23820 /* Generate a tail call to the target function. */
23821 if (!TREE_USED (function))
23822 {
23823 assemble_external (function);
23824 TREE_USED (function) = 1;
23825 }
23826 funexp = XEXP (DECL_RTL (function), 0);
23827 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
23828
23829 #if TARGET_MACHO
23830 if (MACHOPIC_INDIRECT)
23831 funexp = machopic_indirect_call_target (funexp);
23832 #endif
23833
23834 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
23835 generate sibcall RTL explicitly. */
23836 insn = emit_call_insn (
23837 gen_rtx_PARALLEL (VOIDmode,
23838 gen_rtvec (3,
23839 gen_rtx_CALL (VOIDmode,
23840 funexp, const0_rtx),
23841 gen_rtx_USE (VOIDmode, const0_rtx),
23842 simple_return_rtx)));
23843 SIBLING_CALL_P (insn) = 1;
23844 emit_barrier ();
23845
23846 /* Run just enough of rest_of_compilation to get the insns emitted.
23847 There's not really enough bulk here to make other passes such as
23848 instruction scheduling worth while. */
23849 insn = get_insns ();
23850 shorten_branches (insn);
23851 assemble_start_function (thunk_fndecl, fnname);
23852 final_start_function (insn, file, 1);
23853 final (insn, file, 1);
23854 final_end_function ();
23855 assemble_end_function (thunk_fndecl, fnname);
23856
23857 reload_completed = 0;
23858 epilogue_completed = 0;
23859 }
23860 \f
23861 /* A quick summary of the various types of 'constant-pool tables'
23862 under PowerPC:
23863
23864 Target Flags Name One table per
23865 AIX (none) AIX TOC object file
23866 AIX -mfull-toc AIX TOC object file
23867 AIX -mminimal-toc AIX minimal TOC translation unit
23868 SVR4/EABI (none) SVR4 SDATA object file
23869 SVR4/EABI -fpic SVR4 pic object file
23870 SVR4/EABI -fPIC SVR4 PIC translation unit
23871 SVR4/EABI -mrelocatable EABI TOC function
23872 SVR4/EABI -maix AIX TOC object file
23873 SVR4/EABI -maix -mminimal-toc
23874 AIX minimal TOC translation unit
23875
23876 Name Reg. Set by entries contains:
23877 made by addrs? fp? sum?
23878
23879 AIX TOC 2 crt0 as Y option option
23880 AIX minimal TOC 30 prolog gcc Y Y option
23881 SVR4 SDATA 13 crt0 gcc N Y N
23882 SVR4 pic 30 prolog ld Y not yet N
23883 SVR4 PIC 30 prolog gcc Y option option
23884 EABI TOC 30 prolog gcc Y option option
23885
23886 */
23887
23888 /* Hash functions for the hash table. */
23889
23890 static unsigned
23891 rs6000_hash_constant (rtx k)
23892 {
23893 enum rtx_code code = GET_CODE (k);
23894 machine_mode mode = GET_MODE (k);
23895 unsigned result = (code << 3) ^ mode;
23896 const char *format;
23897 int flen, fidx;
23898
23899 format = GET_RTX_FORMAT (code);
23900 flen = strlen (format);
23901 fidx = 0;
23902
23903 switch (code)
23904 {
23905 case LABEL_REF:
23906 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
23907
23908 case CONST_WIDE_INT:
23909 {
23910 int i;
23911 flen = CONST_WIDE_INT_NUNITS (k);
23912 for (i = 0; i < flen; i++)
23913 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
23914 return result;
23915 }
23916
23917 case CONST_DOUBLE:
23918 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
23919
23920 case CODE_LABEL:
23921 fidx = 3;
23922 break;
23923
23924 default:
23925 break;
23926 }
23927
23928 for (; fidx < flen; fidx++)
23929 switch (format[fidx])
23930 {
23931 case 's':
23932 {
23933 unsigned i, len;
23934 const char *str = XSTR (k, fidx);
23935 len = strlen (str);
23936 result = result * 613 + len;
23937 for (i = 0; i < len; i++)
23938 result = result * 613 + (unsigned) str[i];
23939 break;
23940 }
23941 case 'u':
23942 case 'e':
23943 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
23944 break;
23945 case 'i':
23946 case 'n':
23947 result = result * 613 + (unsigned) XINT (k, fidx);
23948 break;
23949 case 'w':
23950 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
23951 result = result * 613 + (unsigned) XWINT (k, fidx);
23952 else
23953 {
23954 size_t i;
23955 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
23956 result = result * 613 + (unsigned) (XWINT (k, fidx)
23957 >> CHAR_BIT * i);
23958 }
23959 break;
23960 case '0':
23961 break;
23962 default:
23963 gcc_unreachable ();
23964 }
23965
23966 return result;
23967 }
23968
23969 hashval_t
23970 toc_hasher::hash (toc_hash_struct *thc)
23971 {
23972 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
23973 }
23974
23975 /* Compare H1 and H2 for equivalence. */
23976
23977 bool
23978 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
23979 {
23980 rtx r1 = h1->key;
23981 rtx r2 = h2->key;
23982
23983 if (h1->key_mode != h2->key_mode)
23984 return 0;
23985
23986 return rtx_equal_p (r1, r2);
23987 }
23988
23989 /* These are the names given by the C++ front-end to vtables, and
23990 vtable-like objects. Ideally, this logic should not be here;
23991 instead, there should be some programmatic way of inquiring as
23992 to whether or not an object is a vtable. */
23993
23994 #define VTABLE_NAME_P(NAME) \
23995 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
23996 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
23997 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
23998 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
23999 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
24000
24001 #ifdef NO_DOLLAR_IN_LABEL
24002 /* Return a GGC-allocated character string translating dollar signs in
24003 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
24004
24005 const char *
24006 rs6000_xcoff_strip_dollar (const char *name)
24007 {
24008 char *strip, *p;
24009 const char *q;
24010 size_t len;
24011
24012 q = (const char *) strchr (name, '$');
24013
24014 if (q == 0 || q == name)
24015 return name;
24016
24017 len = strlen (name);
24018 strip = XALLOCAVEC (char, len + 1);
24019 strcpy (strip, name);
24020 p = strip + (q - name);
24021 while (p)
24022 {
24023 *p = '_';
24024 p = strchr (p + 1, '$');
24025 }
24026
24027 return ggc_alloc_string (strip, len);
24028 }
24029 #endif
24030
24031 void
24032 rs6000_output_symbol_ref (FILE *file, rtx x)
24033 {
24034 const char *name = XSTR (x, 0);
24035
24036 /* Currently C++ toc references to vtables can be emitted before it
24037 is decided whether the vtable is public or private. If this is
24038 the case, then the linker will eventually complain that there is
24039 a reference to an unknown section. Thus, for vtables only,
24040 we emit the TOC reference to reference the identifier and not the
24041 symbol. */
24042 if (VTABLE_NAME_P (name))
24043 {
24044 RS6000_OUTPUT_BASENAME (file, name);
24045 }
24046 else
24047 assemble_name (file, name);
24048 }
24049
24050 /* Output a TOC entry. We derive the entry name from what is being
24051 written. */
24052
24053 void
24054 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
24055 {
24056 char buf[256];
24057 const char *name = buf;
24058 rtx base = x;
24059 HOST_WIDE_INT offset = 0;
24060
24061 gcc_assert (!TARGET_NO_TOC);
24062
24063 /* When the linker won't eliminate them, don't output duplicate
24064 TOC entries (this happens on AIX if there is any kind of TOC,
24065 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
24066 CODE_LABELs. */
24067 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
24068 {
24069 struct toc_hash_struct *h;
24070
24071 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
24072 time because GGC is not initialized at that point. */
24073 if (toc_hash_table == NULL)
24074 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
24075
24076 h = ggc_alloc<toc_hash_struct> ();
24077 h->key = x;
24078 h->key_mode = mode;
24079 h->labelno = labelno;
24080
24081 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
24082 if (*found == NULL)
24083 *found = h;
24084 else /* This is indeed a duplicate.
24085 Set this label equal to that label. */
24086 {
24087 fputs ("\t.set ", file);
24088 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24089 fprintf (file, "%d,", labelno);
24090 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24091 fprintf (file, "%d\n", ((*found)->labelno));
24092
24093 #ifdef HAVE_AS_TLS
24094 if (TARGET_XCOFF && SYMBOL_REF_P (x)
24095 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
24096 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
24097 {
24098 fputs ("\t.set ", file);
24099 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24100 fprintf (file, "%d,", labelno);
24101 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24102 fprintf (file, "%d\n", ((*found)->labelno));
24103 }
24104 #endif
24105 return;
24106 }
24107 }
24108
24109 /* If we're going to put a double constant in the TOC, make sure it's
24110 aligned properly when strict alignment is on. */
24111 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
24112 && STRICT_ALIGNMENT
24113 && GET_MODE_BITSIZE (mode) >= 64
24114 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
24115 ASM_OUTPUT_ALIGN (file, 3);
24116 }
24117
24118 (*targetm.asm_out.internal_label) (file, "LC", labelno);
24119
24120 /* Handle FP constants specially. Note that if we have a minimal
24121 TOC, things we put here aren't actually in the TOC, so we can allow
24122 FP constants. */
24123 if (CONST_DOUBLE_P (x)
24124 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
24125 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
24126 {
24127 long k[4];
24128
24129 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24130 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
24131 else
24132 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24133
24134 if (TARGET_64BIT)
24135 {
24136 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24137 fputs (DOUBLE_INT_ASM_OP, file);
24138 else
24139 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24140 k[0] & 0xffffffff, k[1] & 0xffffffff,
24141 k[2] & 0xffffffff, k[3] & 0xffffffff);
24142 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
24143 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24144 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
24145 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
24146 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
24147 return;
24148 }
24149 else
24150 {
24151 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24152 fputs ("\t.long ", file);
24153 else
24154 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24155 k[0] & 0xffffffff, k[1] & 0xffffffff,
24156 k[2] & 0xffffffff, k[3] & 0xffffffff);
24157 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
24158 k[0] & 0xffffffff, k[1] & 0xffffffff,
24159 k[2] & 0xffffffff, k[3] & 0xffffffff);
24160 return;
24161 }
24162 }
24163 else if (CONST_DOUBLE_P (x)
24164 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
24165 {
24166 long k[2];
24167
24168 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24169 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
24170 else
24171 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24172
24173 if (TARGET_64BIT)
24174 {
24175 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24176 fputs (DOUBLE_INT_ASM_OP, file);
24177 else
24178 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24179 k[0] & 0xffffffff, k[1] & 0xffffffff);
24180 fprintf (file, "0x%lx%08lx\n",
24181 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24182 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
24183 return;
24184 }
24185 else
24186 {
24187 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24188 fputs ("\t.long ", file);
24189 else
24190 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24191 k[0] & 0xffffffff, k[1] & 0xffffffff);
24192 fprintf (file, "0x%lx,0x%lx\n",
24193 k[0] & 0xffffffff, k[1] & 0xffffffff);
24194 return;
24195 }
24196 }
24197 else if (CONST_DOUBLE_P (x)
24198 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
24199 {
24200 long l;
24201
24202 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24203 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
24204 else
24205 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
24206
24207 if (TARGET_64BIT)
24208 {
24209 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24210 fputs (DOUBLE_INT_ASM_OP, file);
24211 else
24212 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24213 if (WORDS_BIG_ENDIAN)
24214 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
24215 else
24216 fprintf (file, "0x%lx\n", l & 0xffffffff);
24217 return;
24218 }
24219 else
24220 {
24221 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24222 fputs ("\t.long ", file);
24223 else
24224 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24225 fprintf (file, "0x%lx\n", l & 0xffffffff);
24226 return;
24227 }
24228 }
24229 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
24230 {
24231 unsigned HOST_WIDE_INT low;
24232 HOST_WIDE_INT high;
24233
24234 low = INTVAL (x) & 0xffffffff;
24235 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
24236
24237 /* TOC entries are always Pmode-sized, so when big-endian
24238 smaller integer constants in the TOC need to be padded.
24239 (This is still a win over putting the constants in
24240 a separate constant pool, because then we'd have
24241 to have both a TOC entry _and_ the actual constant.)
24242
24243 For a 32-bit target, CONST_INT values are loaded and shifted
24244 entirely within `low' and can be stored in one TOC entry. */
24245
24246 /* It would be easy to make this work, but it doesn't now. */
24247 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
24248
24249 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
24250 {
24251 low |= high << 32;
24252 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
24253 high = (HOST_WIDE_INT) low >> 32;
24254 low &= 0xffffffff;
24255 }
24256
24257 if (TARGET_64BIT)
24258 {
24259 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24260 fputs (DOUBLE_INT_ASM_OP, file);
24261 else
24262 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24263 (long) high & 0xffffffff, (long) low & 0xffffffff);
24264 fprintf (file, "0x%lx%08lx\n",
24265 (long) high & 0xffffffff, (long) low & 0xffffffff);
24266 return;
24267 }
24268 else
24269 {
24270 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
24271 {
24272 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24273 fputs ("\t.long ", file);
24274 else
24275 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24276 (long) high & 0xffffffff, (long) low & 0xffffffff);
24277 fprintf (file, "0x%lx,0x%lx\n",
24278 (long) high & 0xffffffff, (long) low & 0xffffffff);
24279 }
24280 else
24281 {
24282 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24283 fputs ("\t.long ", file);
24284 else
24285 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
24286 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
24287 }
24288 return;
24289 }
24290 }
24291
24292 if (GET_CODE (x) == CONST)
24293 {
24294 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
24295 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
24296
24297 base = XEXP (XEXP (x, 0), 0);
24298 offset = INTVAL (XEXP (XEXP (x, 0), 1));
24299 }
24300
24301 switch (GET_CODE (base))
24302 {
24303 case SYMBOL_REF:
24304 name = XSTR (base, 0);
24305 break;
24306
24307 case LABEL_REF:
24308 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
24309 CODE_LABEL_NUMBER (XEXP (base, 0)));
24310 break;
24311
24312 case CODE_LABEL:
24313 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
24314 break;
24315
24316 default:
24317 gcc_unreachable ();
24318 }
24319
24320 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24321 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
24322 else
24323 {
24324 fputs ("\t.tc ", file);
24325 RS6000_OUTPUT_BASENAME (file, name);
24326
24327 if (offset < 0)
24328 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
24329 else if (offset)
24330 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
24331
24332 /* Mark large TOC symbols on AIX with [TE] so they are mapped
24333 after other TOC symbols, reducing overflow of small TOC access
24334 to [TC] symbols. */
24335 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
24336 ? "[TE]," : "[TC],", file);
24337 }
24338
24339 /* Currently C++ toc references to vtables can be emitted before it
24340 is decided whether the vtable is public or private. If this is
24341 the case, then the linker will eventually complain that there is
24342 a TOC reference to an unknown section. Thus, for vtables only,
24343 we emit the TOC reference to reference the symbol and not the
24344 section. */
24345 if (VTABLE_NAME_P (name))
24346 {
24347 RS6000_OUTPUT_BASENAME (file, name);
24348 if (offset < 0)
24349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
24350 else if (offset > 0)
24351 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
24352 }
24353 else
24354 output_addr_const (file, x);
24355
24356 #if HAVE_AS_TLS
24357 if (TARGET_XCOFF && SYMBOL_REF_P (base))
24358 {
24359 switch (SYMBOL_REF_TLS_MODEL (base))
24360 {
24361 case 0:
24362 break;
24363 case TLS_MODEL_LOCAL_EXEC:
24364 fputs ("@le", file);
24365 break;
24366 case TLS_MODEL_INITIAL_EXEC:
24367 fputs ("@ie", file);
24368 break;
24369 /* Use global-dynamic for local-dynamic. */
24370 case TLS_MODEL_GLOBAL_DYNAMIC:
24371 case TLS_MODEL_LOCAL_DYNAMIC:
24372 putc ('\n', file);
24373 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
24374 fputs ("\t.tc .", file);
24375 RS6000_OUTPUT_BASENAME (file, name);
24376 fputs ("[TC],", file);
24377 output_addr_const (file, x);
24378 fputs ("@m", file);
24379 break;
24380 default:
24381 gcc_unreachable ();
24382 }
24383 }
24384 #endif
24385
24386 putc ('\n', file);
24387 }
24388 \f
24389 /* Output an assembler pseudo-op to write an ASCII string of N characters
24390 starting at P to FILE.
24391
24392 On the RS/6000, we have to do this using the .byte operation and
24393 write out special characters outside the quoted string.
24394 Also, the assembler is broken; very long strings are truncated,
24395 so we must artificially break them up early. */
24396
24397 void
24398 output_ascii (FILE *file, const char *p, int n)
24399 {
24400 char c;
24401 int i, count_string;
24402 const char *for_string = "\t.byte \"";
24403 const char *for_decimal = "\t.byte ";
24404 const char *to_close = NULL;
24405
24406 count_string = 0;
24407 for (i = 0; i < n; i++)
24408 {
24409 c = *p++;
24410 if (c >= ' ' && c < 0177)
24411 {
24412 if (for_string)
24413 fputs (for_string, file);
24414 putc (c, file);
24415
24416 /* Write two quotes to get one. */
24417 if (c == '"')
24418 {
24419 putc (c, file);
24420 ++count_string;
24421 }
24422
24423 for_string = NULL;
24424 for_decimal = "\"\n\t.byte ";
24425 to_close = "\"\n";
24426 ++count_string;
24427
24428 if (count_string >= 512)
24429 {
24430 fputs (to_close, file);
24431
24432 for_string = "\t.byte \"";
24433 for_decimal = "\t.byte ";
24434 to_close = NULL;
24435 count_string = 0;
24436 }
24437 }
24438 else
24439 {
24440 if (for_decimal)
24441 fputs (for_decimal, file);
24442 fprintf (file, "%d", c);
24443
24444 for_string = "\n\t.byte \"";
24445 for_decimal = ", ";
24446 to_close = "\n";
24447 count_string = 0;
24448 }
24449 }
24450
24451 /* Now close the string if we have written one. Then end the line. */
24452 if (to_close)
24453 fputs (to_close, file);
24454 }
24455 \f
24456 /* Generate a unique section name for FILENAME for a section type
24457 represented by SECTION_DESC. Output goes into BUF.
24458
24459 SECTION_DESC can be any string, as long as it is different for each
24460 possible section type.
24461
24462 We name the section in the same manner as xlc. The name begins with an
24463 underscore followed by the filename (after stripping any leading directory
24464 names) with the last period replaced by the string SECTION_DESC. If
24465 FILENAME does not contain a period, SECTION_DESC is appended to the end of
24466 the name. */
24467
24468 void
24469 rs6000_gen_section_name (char **buf, const char *filename,
24470 const char *section_desc)
24471 {
24472 const char *q, *after_last_slash, *last_period = 0;
24473 char *p;
24474 int len;
24475
24476 after_last_slash = filename;
24477 for (q = filename; *q; q++)
24478 {
24479 if (*q == '/')
24480 after_last_slash = q + 1;
24481 else if (*q == '.')
24482 last_period = q;
24483 }
24484
24485 len = strlen (after_last_slash) + strlen (section_desc) + 2;
24486 *buf = (char *) xmalloc (len);
24487
24488 p = *buf;
24489 *p++ = '_';
24490
24491 for (q = after_last_slash; *q; q++)
24492 {
24493 if (q == last_period)
24494 {
24495 strcpy (p, section_desc);
24496 p += strlen (section_desc);
24497 break;
24498 }
24499
24500 else if (ISALNUM (*q))
24501 *p++ = *q;
24502 }
24503
24504 if (last_period == 0)
24505 strcpy (p, section_desc);
24506 else
24507 *p = '\0';
24508 }
24509 \f
24510 /* Emit profile function. */
24511
24512 void
24513 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
24514 {
24515 /* Non-standard profiling for kernels, which just saves LR then calls
24516 _mcount without worrying about arg saves. The idea is to change
24517 the function prologue as little as possible as it isn't easy to
24518 account for arg save/restore code added just for _mcount. */
24519 if (TARGET_PROFILE_KERNEL)
24520 return;
24521
24522 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24523 {
24524 #ifndef NO_PROFILE_COUNTERS
24525 # define NO_PROFILE_COUNTERS 0
24526 #endif
24527 if (NO_PROFILE_COUNTERS)
24528 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24529 LCT_NORMAL, VOIDmode);
24530 else
24531 {
24532 char buf[30];
24533 const char *label_name;
24534 rtx fun;
24535
24536 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24537 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
24538 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
24539
24540 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24541 LCT_NORMAL, VOIDmode, fun, Pmode);
24542 }
24543 }
24544 else if (DEFAULT_ABI == ABI_DARWIN)
24545 {
24546 const char *mcount_name = RS6000_MCOUNT;
24547 int caller_addr_regno = LR_REGNO;
24548
24549 /* Be conservative and always set this, at least for now. */
24550 crtl->uses_pic_offset_table = 1;
24551
24552 #if TARGET_MACHO
24553 /* For PIC code, set up a stub and collect the caller's address
24554 from r0, which is where the prologue puts it. */
24555 if (MACHOPIC_INDIRECT
24556 && crtl->uses_pic_offset_table)
24557 caller_addr_regno = 0;
24558 #endif
24559 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
24560 LCT_NORMAL, VOIDmode,
24561 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
24562 }
24563 }
24564
24565 /* Write function profiler code. */
24566
24567 void
24568 output_function_profiler (FILE *file, int labelno)
24569 {
24570 char buf[100];
24571
24572 switch (DEFAULT_ABI)
24573 {
24574 default:
24575 gcc_unreachable ();
24576
24577 case ABI_V4:
24578 if (!TARGET_32BIT)
24579 {
24580 warning (0, "no profiling of 64-bit code for this ABI");
24581 return;
24582 }
24583 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24584 fprintf (file, "\tmflr %s\n", reg_names[0]);
24585 if (NO_PROFILE_COUNTERS)
24586 {
24587 asm_fprintf (file, "\tstw %s,4(%s)\n",
24588 reg_names[0], reg_names[1]);
24589 }
24590 else if (TARGET_SECURE_PLT && flag_pic)
24591 {
24592 if (TARGET_LINK_STACK)
24593 {
24594 char name[32];
24595 get_ppc476_thunk_name (name);
24596 asm_fprintf (file, "\tbl %s\n", name);
24597 }
24598 else
24599 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
24600 asm_fprintf (file, "\tstw %s,4(%s)\n",
24601 reg_names[0], reg_names[1]);
24602 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24603 asm_fprintf (file, "\taddis %s,%s,",
24604 reg_names[12], reg_names[12]);
24605 assemble_name (file, buf);
24606 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
24607 assemble_name (file, buf);
24608 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
24609 }
24610 else if (flag_pic == 1)
24611 {
24612 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
24613 asm_fprintf (file, "\tstw %s,4(%s)\n",
24614 reg_names[0], reg_names[1]);
24615 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24616 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
24617 assemble_name (file, buf);
24618 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
24619 }
24620 else if (flag_pic > 1)
24621 {
24622 asm_fprintf (file, "\tstw %s,4(%s)\n",
24623 reg_names[0], reg_names[1]);
24624 /* Now, we need to get the address of the label. */
24625 if (TARGET_LINK_STACK)
24626 {
24627 char name[32];
24628 get_ppc476_thunk_name (name);
24629 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
24630 assemble_name (file, buf);
24631 fputs ("-.\n1:", file);
24632 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24633 asm_fprintf (file, "\taddi %s,%s,4\n",
24634 reg_names[11], reg_names[11]);
24635 }
24636 else
24637 {
24638 fputs ("\tbcl 20,31,1f\n\t.long ", file);
24639 assemble_name (file, buf);
24640 fputs ("-.\n1:", file);
24641 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24642 }
24643 asm_fprintf (file, "\tlwz %s,0(%s)\n",
24644 reg_names[0], reg_names[11]);
24645 asm_fprintf (file, "\tadd %s,%s,%s\n",
24646 reg_names[0], reg_names[0], reg_names[11]);
24647 }
24648 else
24649 {
24650 asm_fprintf (file, "\tlis %s,", reg_names[12]);
24651 assemble_name (file, buf);
24652 fputs ("@ha\n", file);
24653 asm_fprintf (file, "\tstw %s,4(%s)\n",
24654 reg_names[0], reg_names[1]);
24655 asm_fprintf (file, "\tla %s,", reg_names[0]);
24656 assemble_name (file, buf);
24657 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
24658 }
24659
24660 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
24661 fprintf (file, "\tbl %s%s\n",
24662 RS6000_MCOUNT, flag_pic ? "@plt" : "");
24663 break;
24664
24665 case ABI_AIX:
24666 case ABI_ELFv2:
24667 case ABI_DARWIN:
24668 /* Don't do anything, done in output_profile_hook (). */
24669 break;
24670 }
24671 }
24672
24673 \f
24674
24675 /* The following variable value is the last issued insn. */
24676
24677 static rtx_insn *last_scheduled_insn;
24678
24679 /* The following variable helps to balance issuing of load and
24680 store instructions */
24681
24682 static int load_store_pendulum;
24683
24684 /* The following variable helps pair divide insns during scheduling. */
24685 static int divide_cnt;
24686 /* The following variable helps pair and alternate vector and vector load
24687 insns during scheduling. */
24688 static int vec_pairing;
24689
24690
24691 /* Power4 load update and store update instructions are cracked into a
24692 load or store and an integer insn which are executed in the same cycle.
24693 Branches have their own dispatch slot which does not count against the
24694 GCC issue rate, but it changes the program flow so there are no other
24695 instructions to issue in this cycle. */
24696
24697 static int
24698 rs6000_variable_issue_1 (rtx_insn *insn, int more)
24699 {
24700 last_scheduled_insn = insn;
24701 if (GET_CODE (PATTERN (insn)) == USE
24702 || GET_CODE (PATTERN (insn)) == CLOBBER)
24703 {
24704 cached_can_issue_more = more;
24705 return cached_can_issue_more;
24706 }
24707
24708 if (insn_terminates_group_p (insn, current_group))
24709 {
24710 cached_can_issue_more = 0;
24711 return cached_can_issue_more;
24712 }
24713
24714 /* If no reservation, but reach here */
24715 if (recog_memoized (insn) < 0)
24716 return more;
24717
24718 if (rs6000_sched_groups)
24719 {
24720 if (is_microcoded_insn (insn))
24721 cached_can_issue_more = 0;
24722 else if (is_cracked_insn (insn))
24723 cached_can_issue_more = more > 2 ? more - 2 : 0;
24724 else
24725 cached_can_issue_more = more - 1;
24726
24727 return cached_can_issue_more;
24728 }
24729
24730 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
24731 return 0;
24732
24733 cached_can_issue_more = more - 1;
24734 return cached_can_issue_more;
24735 }
24736
24737 static int
24738 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
24739 {
24740 int r = rs6000_variable_issue_1 (insn, more);
24741 if (verbose)
24742 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
24743 return r;
24744 }
24745
24746 /* Adjust the cost of a scheduling dependency. Return the new cost of
24747 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
24748
24749 static int
24750 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
24751 unsigned int)
24752 {
24753 enum attr_type attr_type;
24754
24755 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
24756 return cost;
24757
24758 switch (dep_type)
24759 {
24760 case REG_DEP_TRUE:
24761 {
24762 /* Data dependency; DEP_INSN writes a register that INSN reads
24763 some cycles later. */
24764
24765 /* Separate a load from a narrower, dependent store. */
24766 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
24767 || rs6000_tune == PROCESSOR_FUTURE)
24768 && GET_CODE (PATTERN (insn)) == SET
24769 && GET_CODE (PATTERN (dep_insn)) == SET
24770 && MEM_P (XEXP (PATTERN (insn), 1))
24771 && MEM_P (XEXP (PATTERN (dep_insn), 0))
24772 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
24773 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
24774 return cost + 14;
24775
24776 attr_type = get_attr_type (insn);
24777
24778 switch (attr_type)
24779 {
24780 case TYPE_JMPREG:
24781 /* Tell the first scheduling pass about the latency between
24782 a mtctr and bctr (and mtlr and br/blr). The first
24783 scheduling pass will not know about this latency since
24784 the mtctr instruction, which has the latency associated
24785 to it, will be generated by reload. */
24786 return 4;
24787 case TYPE_BRANCH:
24788 /* Leave some extra cycles between a compare and its
24789 dependent branch, to inhibit expensive mispredicts. */
24790 if ((rs6000_tune == PROCESSOR_PPC603
24791 || rs6000_tune == PROCESSOR_PPC604
24792 || rs6000_tune == PROCESSOR_PPC604e
24793 || rs6000_tune == PROCESSOR_PPC620
24794 || rs6000_tune == PROCESSOR_PPC630
24795 || rs6000_tune == PROCESSOR_PPC750
24796 || rs6000_tune == PROCESSOR_PPC7400
24797 || rs6000_tune == PROCESSOR_PPC7450
24798 || rs6000_tune == PROCESSOR_PPCE5500
24799 || rs6000_tune == PROCESSOR_PPCE6500
24800 || rs6000_tune == PROCESSOR_POWER4
24801 || rs6000_tune == PROCESSOR_POWER5
24802 || rs6000_tune == PROCESSOR_POWER7
24803 || rs6000_tune == PROCESSOR_POWER8
24804 || rs6000_tune == PROCESSOR_POWER9
24805 || rs6000_tune == PROCESSOR_FUTURE
24806 || rs6000_tune == PROCESSOR_CELL)
24807 && recog_memoized (dep_insn)
24808 && (INSN_CODE (dep_insn) >= 0))
24809
24810 switch (get_attr_type (dep_insn))
24811 {
24812 case TYPE_CMP:
24813 case TYPE_FPCOMPARE:
24814 case TYPE_CR_LOGICAL:
24815 return cost + 2;
24816 case TYPE_EXTS:
24817 case TYPE_MUL:
24818 if (get_attr_dot (dep_insn) == DOT_YES)
24819 return cost + 2;
24820 else
24821 break;
24822 case TYPE_SHIFT:
24823 if (get_attr_dot (dep_insn) == DOT_YES
24824 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
24825 return cost + 2;
24826 else
24827 break;
24828 default:
24829 break;
24830 }
24831 break;
24832
24833 case TYPE_STORE:
24834 case TYPE_FPSTORE:
24835 if ((rs6000_tune == PROCESSOR_POWER6)
24836 && recog_memoized (dep_insn)
24837 && (INSN_CODE (dep_insn) >= 0))
24838 {
24839
24840 if (GET_CODE (PATTERN (insn)) != SET)
24841 /* If this happens, we have to extend this to schedule
24842 optimally. Return default for now. */
24843 return cost;
24844
24845 /* Adjust the cost for the case where the value written
24846 by a fixed point operation is used as the address
24847 gen value on a store. */
24848 switch (get_attr_type (dep_insn))
24849 {
24850 case TYPE_LOAD:
24851 case TYPE_CNTLZ:
24852 {
24853 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24854 return get_attr_sign_extend (dep_insn)
24855 == SIGN_EXTEND_YES ? 6 : 4;
24856 break;
24857 }
24858 case TYPE_SHIFT:
24859 {
24860 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24861 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24862 6 : 3;
24863 break;
24864 }
24865 case TYPE_INTEGER:
24866 case TYPE_ADD:
24867 case TYPE_LOGICAL:
24868 case TYPE_EXTS:
24869 case TYPE_INSERT:
24870 {
24871 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24872 return 3;
24873 break;
24874 }
24875 case TYPE_STORE:
24876 case TYPE_FPLOAD:
24877 case TYPE_FPSTORE:
24878 {
24879 if (get_attr_update (dep_insn) == UPDATE_YES
24880 && ! rs6000_store_data_bypass_p (dep_insn, insn))
24881 return 3;
24882 break;
24883 }
24884 case TYPE_MUL:
24885 {
24886 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24887 return 17;
24888 break;
24889 }
24890 case TYPE_DIV:
24891 {
24892 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24893 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24894 break;
24895 }
24896 default:
24897 break;
24898 }
24899 }
24900 break;
24901
24902 case TYPE_LOAD:
24903 if ((rs6000_tune == PROCESSOR_POWER6)
24904 && recog_memoized (dep_insn)
24905 && (INSN_CODE (dep_insn) >= 0))
24906 {
24907
24908 /* Adjust the cost for the case where the value written
24909 by a fixed point instruction is used within the address
24910 gen portion of a subsequent load(u)(x) */
24911 switch (get_attr_type (dep_insn))
24912 {
24913 case TYPE_LOAD:
24914 case TYPE_CNTLZ:
24915 {
24916 if (set_to_load_agen (dep_insn, insn))
24917 return get_attr_sign_extend (dep_insn)
24918 == SIGN_EXTEND_YES ? 6 : 4;
24919 break;
24920 }
24921 case TYPE_SHIFT:
24922 {
24923 if (set_to_load_agen (dep_insn, insn))
24924 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24925 6 : 3;
24926 break;
24927 }
24928 case TYPE_INTEGER:
24929 case TYPE_ADD:
24930 case TYPE_LOGICAL:
24931 case TYPE_EXTS:
24932 case TYPE_INSERT:
24933 {
24934 if (set_to_load_agen (dep_insn, insn))
24935 return 3;
24936 break;
24937 }
24938 case TYPE_STORE:
24939 case TYPE_FPLOAD:
24940 case TYPE_FPSTORE:
24941 {
24942 if (get_attr_update (dep_insn) == UPDATE_YES
24943 && set_to_load_agen (dep_insn, insn))
24944 return 3;
24945 break;
24946 }
24947 case TYPE_MUL:
24948 {
24949 if (set_to_load_agen (dep_insn, insn))
24950 return 17;
24951 break;
24952 }
24953 case TYPE_DIV:
24954 {
24955 if (set_to_load_agen (dep_insn, insn))
24956 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24957 break;
24958 }
24959 default:
24960 break;
24961 }
24962 }
24963 break;
24964
24965 case TYPE_FPLOAD:
24966 if ((rs6000_tune == PROCESSOR_POWER6)
24967 && get_attr_update (insn) == UPDATE_NO
24968 && recog_memoized (dep_insn)
24969 && (INSN_CODE (dep_insn) >= 0)
24970 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
24971 return 2;
24972
24973 default:
24974 break;
24975 }
24976
24977 /* Fall out to return default cost. */
24978 }
24979 break;
24980
24981 case REG_DEP_OUTPUT:
24982 /* Output dependency; DEP_INSN writes a register that INSN writes some
24983 cycles later. */
24984 if ((rs6000_tune == PROCESSOR_POWER6)
24985 && recog_memoized (dep_insn)
24986 && (INSN_CODE (dep_insn) >= 0))
24987 {
24988 attr_type = get_attr_type (insn);
24989
24990 switch (attr_type)
24991 {
24992 case TYPE_FP:
24993 case TYPE_FPSIMPLE:
24994 if (get_attr_type (dep_insn) == TYPE_FP
24995 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
24996 return 1;
24997 break;
24998 case TYPE_FPLOAD:
24999 if (get_attr_update (insn) == UPDATE_NO
25000 && get_attr_type (dep_insn) == TYPE_MFFGPR)
25001 return 2;
25002 break;
25003 default:
25004 break;
25005 }
25006 }
25007 /* Fall through, no cost for output dependency. */
25008 /* FALLTHRU */
25009
25010 case REG_DEP_ANTI:
25011 /* Anti dependency; DEP_INSN reads a register that INSN writes some
25012 cycles later. */
25013 return 0;
25014
25015 default:
25016 gcc_unreachable ();
25017 }
25018
25019 return cost;
25020 }
25021
25022 /* Debug version of rs6000_adjust_cost. */
25023
25024 static int
25025 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
25026 int cost, unsigned int dw)
25027 {
25028 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
25029
25030 if (ret != cost)
25031 {
25032 const char *dep;
25033
25034 switch (dep_type)
25035 {
25036 default: dep = "unknown depencency"; break;
25037 case REG_DEP_TRUE: dep = "data dependency"; break;
25038 case REG_DEP_OUTPUT: dep = "output dependency"; break;
25039 case REG_DEP_ANTI: dep = "anti depencency"; break;
25040 }
25041
25042 fprintf (stderr,
25043 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
25044 "%s, insn:\n", ret, cost, dep);
25045
25046 debug_rtx (insn);
25047 }
25048
25049 return ret;
25050 }
25051
25052 /* The function returns a true if INSN is microcoded.
25053 Return false otherwise. */
25054
25055 static bool
25056 is_microcoded_insn (rtx_insn *insn)
25057 {
25058 if (!insn || !NONDEBUG_INSN_P (insn)
25059 || GET_CODE (PATTERN (insn)) == USE
25060 || GET_CODE (PATTERN (insn)) == CLOBBER)
25061 return false;
25062
25063 if (rs6000_tune == PROCESSOR_CELL)
25064 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
25065
25066 if (rs6000_sched_groups
25067 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25068 {
25069 enum attr_type type = get_attr_type (insn);
25070 if ((type == TYPE_LOAD
25071 && get_attr_update (insn) == UPDATE_YES
25072 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25073 || ((type == TYPE_LOAD || type == TYPE_STORE)
25074 && get_attr_update (insn) == UPDATE_YES
25075 && get_attr_indexed (insn) == INDEXED_YES)
25076 || type == TYPE_MFCR)
25077 return true;
25078 }
25079
25080 return false;
25081 }
25082
25083 /* The function returns true if INSN is cracked into 2 instructions
25084 by the processor (and therefore occupies 2 issue slots). */
25085
25086 static bool
25087 is_cracked_insn (rtx_insn *insn)
25088 {
25089 if (!insn || !NONDEBUG_INSN_P (insn)
25090 || GET_CODE (PATTERN (insn)) == USE
25091 || GET_CODE (PATTERN (insn)) == CLOBBER)
25092 return false;
25093
25094 if (rs6000_sched_groups
25095 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25096 {
25097 enum attr_type type = get_attr_type (insn);
25098 if ((type == TYPE_LOAD
25099 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
25100 && get_attr_update (insn) == UPDATE_NO)
25101 || (type == TYPE_LOAD
25102 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
25103 && get_attr_update (insn) == UPDATE_YES
25104 && get_attr_indexed (insn) == INDEXED_NO)
25105 || (type == TYPE_STORE
25106 && get_attr_update (insn) == UPDATE_YES
25107 && get_attr_indexed (insn) == INDEXED_NO)
25108 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
25109 && get_attr_update (insn) == UPDATE_YES)
25110 || (type == TYPE_CR_LOGICAL
25111 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
25112 || (type == TYPE_EXTS
25113 && get_attr_dot (insn) == DOT_YES)
25114 || (type == TYPE_SHIFT
25115 && get_attr_dot (insn) == DOT_YES
25116 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
25117 || (type == TYPE_MUL
25118 && get_attr_dot (insn) == DOT_YES)
25119 || type == TYPE_DIV
25120 || (type == TYPE_INSERT
25121 && get_attr_size (insn) == SIZE_32))
25122 return true;
25123 }
25124
25125 return false;
25126 }
25127
25128 /* The function returns true if INSN can be issued only from
25129 the branch slot. */
25130
25131 static bool
25132 is_branch_slot_insn (rtx_insn *insn)
25133 {
25134 if (!insn || !NONDEBUG_INSN_P (insn)
25135 || GET_CODE (PATTERN (insn)) == USE
25136 || GET_CODE (PATTERN (insn)) == CLOBBER)
25137 return false;
25138
25139 if (rs6000_sched_groups)
25140 {
25141 enum attr_type type = get_attr_type (insn);
25142 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
25143 return true;
25144 return false;
25145 }
25146
25147 return false;
25148 }
25149
25150 /* The function returns true if out_inst sets a value that is
25151 used in the address generation computation of in_insn */
25152 static bool
25153 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
25154 {
25155 rtx out_set, in_set;
25156
25157 /* For performance reasons, only handle the simple case where
25158 both loads are a single_set. */
25159 out_set = single_set (out_insn);
25160 if (out_set)
25161 {
25162 in_set = single_set (in_insn);
25163 if (in_set)
25164 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
25165 }
25166
25167 return false;
25168 }
25169
25170 /* Try to determine base/offset/size parts of the given MEM.
25171 Return true if successful, false if all the values couldn't
25172 be determined.
25173
25174 This function only looks for REG or REG+CONST address forms.
25175 REG+REG address form will return false. */
25176
25177 static bool
25178 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
25179 HOST_WIDE_INT *size)
25180 {
25181 rtx addr_rtx;
25182 if MEM_SIZE_KNOWN_P (mem)
25183 *size = MEM_SIZE (mem);
25184 else
25185 return false;
25186
25187 addr_rtx = (XEXP (mem, 0));
25188 if (GET_CODE (addr_rtx) == PRE_MODIFY)
25189 addr_rtx = XEXP (addr_rtx, 1);
25190
25191 *offset = 0;
25192 while (GET_CODE (addr_rtx) == PLUS
25193 && CONST_INT_P (XEXP (addr_rtx, 1)))
25194 {
25195 *offset += INTVAL (XEXP (addr_rtx, 1));
25196 addr_rtx = XEXP (addr_rtx, 0);
25197 }
25198 if (!REG_P (addr_rtx))
25199 return false;
25200
25201 *base = addr_rtx;
25202 return true;
25203 }
25204
25205 /* The function returns true if the target storage location of
25206 mem1 is adjacent to the target storage location of mem2 */
25207 /* Return 1 if memory locations are adjacent. */
25208
25209 static bool
25210 adjacent_mem_locations (rtx mem1, rtx mem2)
25211 {
25212 rtx reg1, reg2;
25213 HOST_WIDE_INT off1, size1, off2, size2;
25214
25215 if (get_memref_parts (mem1, &reg1, &off1, &size1)
25216 && get_memref_parts (mem2, &reg2, &off2, &size2))
25217 return ((REGNO (reg1) == REGNO (reg2))
25218 && ((off1 + size1 == off2)
25219 || (off2 + size2 == off1)));
25220
25221 return false;
25222 }
25223
25224 /* This function returns true if it can be determined that the two MEM
25225 locations overlap by at least 1 byte based on base reg/offset/size. */
25226
25227 static bool
25228 mem_locations_overlap (rtx mem1, rtx mem2)
25229 {
25230 rtx reg1, reg2;
25231 HOST_WIDE_INT off1, size1, off2, size2;
25232
25233 if (get_memref_parts (mem1, &reg1, &off1, &size1)
25234 && get_memref_parts (mem2, &reg2, &off2, &size2))
25235 return ((REGNO (reg1) == REGNO (reg2))
25236 && (((off1 <= off2) && (off1 + size1 > off2))
25237 || ((off2 <= off1) && (off2 + size2 > off1))));
25238
25239 return false;
25240 }
25241
25242 /* A C statement (sans semicolon) to update the integer scheduling
25243 priority INSN_PRIORITY (INSN). Increase the priority to execute the
25244 INSN earlier, reduce the priority to execute INSN later. Do not
25245 define this macro if you do not need to adjust the scheduling
25246 priorities of insns. */
25247
25248 static int
25249 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
25250 {
25251 rtx load_mem, str_mem;
25252 /* On machines (like the 750) which have asymmetric integer units,
25253 where one integer unit can do multiply and divides and the other
25254 can't, reduce the priority of multiply/divide so it is scheduled
25255 before other integer operations. */
25256
25257 #if 0
25258 if (! INSN_P (insn))
25259 return priority;
25260
25261 if (GET_CODE (PATTERN (insn)) == USE)
25262 return priority;
25263
25264 switch (rs6000_tune) {
25265 case PROCESSOR_PPC750:
25266 switch (get_attr_type (insn))
25267 {
25268 default:
25269 break;
25270
25271 case TYPE_MUL:
25272 case TYPE_DIV:
25273 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
25274 priority, priority);
25275 if (priority >= 0 && priority < 0x01000000)
25276 priority >>= 3;
25277 break;
25278 }
25279 }
25280 #endif
25281
25282 if (insn_must_be_first_in_group (insn)
25283 && reload_completed
25284 && current_sched_info->sched_max_insns_priority
25285 && rs6000_sched_restricted_insns_priority)
25286 {
25287
25288 /* Prioritize insns that can be dispatched only in the first
25289 dispatch slot. */
25290 if (rs6000_sched_restricted_insns_priority == 1)
25291 /* Attach highest priority to insn. This means that in
25292 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
25293 precede 'priority' (critical path) considerations. */
25294 return current_sched_info->sched_max_insns_priority;
25295 else if (rs6000_sched_restricted_insns_priority == 2)
25296 /* Increase priority of insn by a minimal amount. This means that in
25297 haifa-sched.c:ready_sort(), only 'priority' (critical path)
25298 considerations precede dispatch-slot restriction considerations. */
25299 return (priority + 1);
25300 }
25301
25302 if (rs6000_tune == PROCESSOR_POWER6
25303 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
25304 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
25305 /* Attach highest priority to insn if the scheduler has just issued two
25306 stores and this instruction is a load, or two loads and this instruction
25307 is a store. Power6 wants loads and stores scheduled alternately
25308 when possible */
25309 return current_sched_info->sched_max_insns_priority;
25310
25311 return priority;
25312 }
25313
25314 /* Return true if the instruction is nonpipelined on the Cell. */
25315 static bool
25316 is_nonpipeline_insn (rtx_insn *insn)
25317 {
25318 enum attr_type type;
25319 if (!insn || !NONDEBUG_INSN_P (insn)
25320 || GET_CODE (PATTERN (insn)) == USE
25321 || GET_CODE (PATTERN (insn)) == CLOBBER)
25322 return false;
25323
25324 type = get_attr_type (insn);
25325 if (type == TYPE_MUL
25326 || type == TYPE_DIV
25327 || type == TYPE_SDIV
25328 || type == TYPE_DDIV
25329 || type == TYPE_SSQRT
25330 || type == TYPE_DSQRT
25331 || type == TYPE_MFCR
25332 || type == TYPE_MFCRF
25333 || type == TYPE_MFJMPR)
25334 {
25335 return true;
25336 }
25337 return false;
25338 }
25339
25340
25341 /* Return how many instructions the machine can issue per cycle. */
25342
25343 static int
25344 rs6000_issue_rate (void)
25345 {
25346 /* Unless scheduling for register pressure, use issue rate of 1 for
25347 first scheduling pass to decrease degradation. */
25348 if (!reload_completed && !flag_sched_pressure)
25349 return 1;
25350
25351 switch (rs6000_tune) {
25352 case PROCESSOR_RS64A:
25353 case PROCESSOR_PPC601: /* ? */
25354 case PROCESSOR_PPC7450:
25355 return 3;
25356 case PROCESSOR_PPC440:
25357 case PROCESSOR_PPC603:
25358 case PROCESSOR_PPC750:
25359 case PROCESSOR_PPC7400:
25360 case PROCESSOR_PPC8540:
25361 case PROCESSOR_PPC8548:
25362 case PROCESSOR_CELL:
25363 case PROCESSOR_PPCE300C2:
25364 case PROCESSOR_PPCE300C3:
25365 case PROCESSOR_PPCE500MC:
25366 case PROCESSOR_PPCE500MC64:
25367 case PROCESSOR_PPCE5500:
25368 case PROCESSOR_PPCE6500:
25369 case PROCESSOR_TITAN:
25370 return 2;
25371 case PROCESSOR_PPC476:
25372 case PROCESSOR_PPC604:
25373 case PROCESSOR_PPC604e:
25374 case PROCESSOR_PPC620:
25375 case PROCESSOR_PPC630:
25376 return 4;
25377 case PROCESSOR_POWER4:
25378 case PROCESSOR_POWER5:
25379 case PROCESSOR_POWER6:
25380 case PROCESSOR_POWER7:
25381 return 5;
25382 case PROCESSOR_POWER8:
25383 return 7;
25384 case PROCESSOR_POWER9:
25385 case PROCESSOR_FUTURE:
25386 return 6;
25387 default:
25388 return 1;
25389 }
25390 }
25391
25392 /* Return how many instructions to look ahead for better insn
25393 scheduling. */
25394
25395 static int
25396 rs6000_use_sched_lookahead (void)
25397 {
25398 switch (rs6000_tune)
25399 {
25400 case PROCESSOR_PPC8540:
25401 case PROCESSOR_PPC8548:
25402 return 4;
25403
25404 case PROCESSOR_CELL:
25405 return (reload_completed ? 8 : 0);
25406
25407 default:
25408 return 0;
25409 }
25410 }
25411
25412 /* We are choosing insn from the ready queue. Return zero if INSN can be
25413 chosen. */
25414 static int
25415 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
25416 {
25417 if (ready_index == 0)
25418 return 0;
25419
25420 if (rs6000_tune != PROCESSOR_CELL)
25421 return 0;
25422
25423 gcc_assert (insn != NULL_RTX && INSN_P (insn));
25424
25425 if (!reload_completed
25426 || is_nonpipeline_insn (insn)
25427 || is_microcoded_insn (insn))
25428 return 1;
25429
25430 return 0;
25431 }
25432
25433 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
25434 and return true. */
25435
25436 static bool
25437 find_mem_ref (rtx pat, rtx *mem_ref)
25438 {
25439 const char * fmt;
25440 int i, j;
25441
25442 /* stack_tie does not produce any real memory traffic. */
25443 if (tie_operand (pat, VOIDmode))
25444 return false;
25445
25446 if (MEM_P (pat))
25447 {
25448 *mem_ref = pat;
25449 return true;
25450 }
25451
25452 /* Recursively process the pattern. */
25453 fmt = GET_RTX_FORMAT (GET_CODE (pat));
25454
25455 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
25456 {
25457 if (fmt[i] == 'e')
25458 {
25459 if (find_mem_ref (XEXP (pat, i), mem_ref))
25460 return true;
25461 }
25462 else if (fmt[i] == 'E')
25463 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
25464 {
25465 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
25466 return true;
25467 }
25468 }
25469
25470 return false;
25471 }
25472
25473 /* Determine if PAT is a PATTERN of a load insn. */
25474
25475 static bool
25476 is_load_insn1 (rtx pat, rtx *load_mem)
25477 {
25478 if (!pat || pat == NULL_RTX)
25479 return false;
25480
25481 if (GET_CODE (pat) == SET)
25482 return find_mem_ref (SET_SRC (pat), load_mem);
25483
25484 if (GET_CODE (pat) == PARALLEL)
25485 {
25486 int i;
25487
25488 for (i = 0; i < XVECLEN (pat, 0); i++)
25489 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
25490 return true;
25491 }
25492
25493 return false;
25494 }
25495
25496 /* Determine if INSN loads from memory. */
25497
25498 static bool
25499 is_load_insn (rtx insn, rtx *load_mem)
25500 {
25501 if (!insn || !INSN_P (insn))
25502 return false;
25503
25504 if (CALL_P (insn))
25505 return false;
25506
25507 return is_load_insn1 (PATTERN (insn), load_mem);
25508 }
25509
25510 /* Determine if PAT is a PATTERN of a store insn. */
25511
25512 static bool
25513 is_store_insn1 (rtx pat, rtx *str_mem)
25514 {
25515 if (!pat || pat == NULL_RTX)
25516 return false;
25517
25518 if (GET_CODE (pat) == SET)
25519 return find_mem_ref (SET_DEST (pat), str_mem);
25520
25521 if (GET_CODE (pat) == PARALLEL)
25522 {
25523 int i;
25524
25525 for (i = 0; i < XVECLEN (pat, 0); i++)
25526 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
25527 return true;
25528 }
25529
25530 return false;
25531 }
25532
25533 /* Determine if INSN stores to memory. */
25534
25535 static bool
25536 is_store_insn (rtx insn, rtx *str_mem)
25537 {
25538 if (!insn || !INSN_P (insn))
25539 return false;
25540
25541 return is_store_insn1 (PATTERN (insn), str_mem);
25542 }
25543
25544 /* Return whether TYPE is a Power9 pairable vector instruction type. */
25545
25546 static bool
25547 is_power9_pairable_vec_type (enum attr_type type)
25548 {
25549 switch (type)
25550 {
25551 case TYPE_VECSIMPLE:
25552 case TYPE_VECCOMPLEX:
25553 case TYPE_VECDIV:
25554 case TYPE_VECCMP:
25555 case TYPE_VECPERM:
25556 case TYPE_VECFLOAT:
25557 case TYPE_VECFDIV:
25558 case TYPE_VECDOUBLE:
25559 return true;
25560 default:
25561 break;
25562 }
25563 return false;
25564 }
25565
25566 /* Returns whether the dependence between INSN and NEXT is considered
25567 costly by the given target. */
25568
25569 static bool
25570 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
25571 {
25572 rtx insn;
25573 rtx next;
25574 rtx load_mem, str_mem;
25575
25576 /* If the flag is not enabled - no dependence is considered costly;
25577 allow all dependent insns in the same group.
25578 This is the most aggressive option. */
25579 if (rs6000_sched_costly_dep == no_dep_costly)
25580 return false;
25581
25582 /* If the flag is set to 1 - a dependence is always considered costly;
25583 do not allow dependent instructions in the same group.
25584 This is the most conservative option. */
25585 if (rs6000_sched_costly_dep == all_deps_costly)
25586 return true;
25587
25588 insn = DEP_PRO (dep);
25589 next = DEP_CON (dep);
25590
25591 if (rs6000_sched_costly_dep == store_to_load_dep_costly
25592 && is_load_insn (next, &load_mem)
25593 && is_store_insn (insn, &str_mem))
25594 /* Prevent load after store in the same group. */
25595 return true;
25596
25597 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
25598 && is_load_insn (next, &load_mem)
25599 && is_store_insn (insn, &str_mem)
25600 && DEP_TYPE (dep) == REG_DEP_TRUE
25601 && mem_locations_overlap(str_mem, load_mem))
25602 /* Prevent load after store in the same group if it is a true
25603 dependence. */
25604 return true;
25605
25606 /* The flag is set to X; dependences with latency >= X are considered costly,
25607 and will not be scheduled in the same group. */
25608 if (rs6000_sched_costly_dep <= max_dep_latency
25609 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
25610 return true;
25611
25612 return false;
25613 }
25614
25615 /* Return the next insn after INSN that is found before TAIL is reached,
25616 skipping any "non-active" insns - insns that will not actually occupy
25617 an issue slot. Return NULL_RTX if such an insn is not found. */
25618
25619 static rtx_insn *
25620 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
25621 {
25622 if (insn == NULL_RTX || insn == tail)
25623 return NULL;
25624
25625 while (1)
25626 {
25627 insn = NEXT_INSN (insn);
25628 if (insn == NULL_RTX || insn == tail)
25629 return NULL;
25630
25631 if (CALL_P (insn)
25632 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
25633 || (NONJUMP_INSN_P (insn)
25634 && GET_CODE (PATTERN (insn)) != USE
25635 && GET_CODE (PATTERN (insn)) != CLOBBER
25636 && INSN_CODE (insn) != CODE_FOR_stack_tie))
25637 break;
25638 }
25639 return insn;
25640 }
25641
25642 /* Do Power9 specific sched_reorder2 reordering of ready list. */
25643
25644 static int
25645 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
25646 {
25647 int pos;
25648 int i;
25649 rtx_insn *tmp;
25650 enum attr_type type, type2;
25651
25652 type = get_attr_type (last_scheduled_insn);
25653
25654 /* Try to issue fixed point divides back-to-back in pairs so they will be
25655 routed to separate execution units and execute in parallel. */
25656 if (type == TYPE_DIV && divide_cnt == 0)
25657 {
25658 /* First divide has been scheduled. */
25659 divide_cnt = 1;
25660
25661 /* Scan the ready list looking for another divide, if found move it
25662 to the end of the list so it is chosen next. */
25663 pos = lastpos;
25664 while (pos >= 0)
25665 {
25666 if (recog_memoized (ready[pos]) >= 0
25667 && get_attr_type (ready[pos]) == TYPE_DIV)
25668 {
25669 tmp = ready[pos];
25670 for (i = pos; i < lastpos; i++)
25671 ready[i] = ready[i + 1];
25672 ready[lastpos] = tmp;
25673 break;
25674 }
25675 pos--;
25676 }
25677 }
25678 else
25679 {
25680 /* Last insn was the 2nd divide or not a divide, reset the counter. */
25681 divide_cnt = 0;
25682
25683 /* The best dispatch throughput for vector and vector load insns can be
25684 achieved by interleaving a vector and vector load such that they'll
25685 dispatch to the same superslice. If this pairing cannot be achieved
25686 then it is best to pair vector insns together and vector load insns
25687 together.
25688
25689 To aid in this pairing, vec_pairing maintains the current state with
25690 the following values:
25691
25692 0 : Initial state, no vecload/vector pairing has been started.
25693
25694 1 : A vecload or vector insn has been issued and a candidate for
25695 pairing has been found and moved to the end of the ready
25696 list. */
25697 if (type == TYPE_VECLOAD)
25698 {
25699 /* Issued a vecload. */
25700 if (vec_pairing == 0)
25701 {
25702 int vecload_pos = -1;
25703 /* We issued a single vecload, look for a vector insn to pair it
25704 with. If one isn't found, try to pair another vecload. */
25705 pos = lastpos;
25706 while (pos >= 0)
25707 {
25708 if (recog_memoized (ready[pos]) >= 0)
25709 {
25710 type2 = get_attr_type (ready[pos]);
25711 if (is_power9_pairable_vec_type (type2))
25712 {
25713 /* Found a vector insn to pair with, move it to the
25714 end of the ready list so it is scheduled next. */
25715 tmp = ready[pos];
25716 for (i = pos; i < lastpos; i++)
25717 ready[i] = ready[i + 1];
25718 ready[lastpos] = tmp;
25719 vec_pairing = 1;
25720 return cached_can_issue_more;
25721 }
25722 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
25723 /* Remember position of first vecload seen. */
25724 vecload_pos = pos;
25725 }
25726 pos--;
25727 }
25728 if (vecload_pos >= 0)
25729 {
25730 /* Didn't find a vector to pair with but did find a vecload,
25731 move it to the end of the ready list. */
25732 tmp = ready[vecload_pos];
25733 for (i = vecload_pos; i < lastpos; i++)
25734 ready[i] = ready[i + 1];
25735 ready[lastpos] = tmp;
25736 vec_pairing = 1;
25737 return cached_can_issue_more;
25738 }
25739 }
25740 }
25741 else if (is_power9_pairable_vec_type (type))
25742 {
25743 /* Issued a vector operation. */
25744 if (vec_pairing == 0)
25745 {
25746 int vec_pos = -1;
25747 /* We issued a single vector insn, look for a vecload to pair it
25748 with. If one isn't found, try to pair another vector. */
25749 pos = lastpos;
25750 while (pos >= 0)
25751 {
25752 if (recog_memoized (ready[pos]) >= 0)
25753 {
25754 type2 = get_attr_type (ready[pos]);
25755 if (type2 == TYPE_VECLOAD)
25756 {
25757 /* Found a vecload insn to pair with, move it to the
25758 end of the ready list so it is scheduled next. */
25759 tmp = ready[pos];
25760 for (i = pos; i < lastpos; i++)
25761 ready[i] = ready[i + 1];
25762 ready[lastpos] = tmp;
25763 vec_pairing = 1;
25764 return cached_can_issue_more;
25765 }
25766 else if (is_power9_pairable_vec_type (type2)
25767 && vec_pos == -1)
25768 /* Remember position of first vector insn seen. */
25769 vec_pos = pos;
25770 }
25771 pos--;
25772 }
25773 if (vec_pos >= 0)
25774 {
25775 /* Didn't find a vecload to pair with but did find a vector
25776 insn, move it to the end of the ready list. */
25777 tmp = ready[vec_pos];
25778 for (i = vec_pos; i < lastpos; i++)
25779 ready[i] = ready[i + 1];
25780 ready[lastpos] = tmp;
25781 vec_pairing = 1;
25782 return cached_can_issue_more;
25783 }
25784 }
25785 }
25786
25787 /* We've either finished a vec/vecload pair, couldn't find an insn to
25788 continue the current pair, or the last insn had nothing to do with
25789 with pairing. In any case, reset the state. */
25790 vec_pairing = 0;
25791 }
25792
25793 return cached_can_issue_more;
25794 }
25795
25796 /* We are about to begin issuing insns for this clock cycle. */
25797
25798 static int
25799 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
25800 rtx_insn **ready ATTRIBUTE_UNUSED,
25801 int *pn_ready ATTRIBUTE_UNUSED,
25802 int clock_var ATTRIBUTE_UNUSED)
25803 {
25804 int n_ready = *pn_ready;
25805
25806 if (sched_verbose)
25807 fprintf (dump, "// rs6000_sched_reorder :\n");
25808
25809 /* Reorder the ready list, if the second to last ready insn
25810 is a nonepipeline insn. */
25811 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
25812 {
25813 if (is_nonpipeline_insn (ready[n_ready - 1])
25814 && (recog_memoized (ready[n_ready - 2]) > 0))
25815 /* Simply swap first two insns. */
25816 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
25817 }
25818
25819 if (rs6000_tune == PROCESSOR_POWER6)
25820 load_store_pendulum = 0;
25821
25822 return rs6000_issue_rate ();
25823 }
25824
25825 /* Like rs6000_sched_reorder, but called after issuing each insn. */
25826
25827 static int
25828 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
25829 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
25830 {
25831 if (sched_verbose)
25832 fprintf (dump, "// rs6000_sched_reorder2 :\n");
25833
25834 /* For Power6, we need to handle some special cases to try and keep the
25835 store queue from overflowing and triggering expensive flushes.
25836
25837 This code monitors how load and store instructions are being issued
25838 and skews the ready list one way or the other to increase the likelihood
25839 that a desired instruction is issued at the proper time.
25840
25841 A couple of things are done. First, we maintain a "load_store_pendulum"
25842 to track the current state of load/store issue.
25843
25844 - If the pendulum is at zero, then no loads or stores have been
25845 issued in the current cycle so we do nothing.
25846
25847 - If the pendulum is 1, then a single load has been issued in this
25848 cycle and we attempt to locate another load in the ready list to
25849 issue with it.
25850
25851 - If the pendulum is -2, then two stores have already been
25852 issued in this cycle, so we increase the priority of the first load
25853 in the ready list to increase it's likelihood of being chosen first
25854 in the next cycle.
25855
25856 - If the pendulum is -1, then a single store has been issued in this
25857 cycle and we attempt to locate another store in the ready list to
25858 issue with it, preferring a store to an adjacent memory location to
25859 facilitate store pairing in the store queue.
25860
25861 - If the pendulum is 2, then two loads have already been
25862 issued in this cycle, so we increase the priority of the first store
25863 in the ready list to increase it's likelihood of being chosen first
25864 in the next cycle.
25865
25866 - If the pendulum < -2 or > 2, then do nothing.
25867
25868 Note: This code covers the most common scenarios. There exist non
25869 load/store instructions which make use of the LSU and which
25870 would need to be accounted for to strictly model the behavior
25871 of the machine. Those instructions are currently unaccounted
25872 for to help minimize compile time overhead of this code.
25873 */
25874 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
25875 {
25876 int pos;
25877 int i;
25878 rtx_insn *tmp;
25879 rtx load_mem, str_mem;
25880
25881 if (is_store_insn (last_scheduled_insn, &str_mem))
25882 /* Issuing a store, swing the load_store_pendulum to the left */
25883 load_store_pendulum--;
25884 else if (is_load_insn (last_scheduled_insn, &load_mem))
25885 /* Issuing a load, swing the load_store_pendulum to the right */
25886 load_store_pendulum++;
25887 else
25888 return cached_can_issue_more;
25889
25890 /* If the pendulum is balanced, or there is only one instruction on
25891 the ready list, then all is well, so return. */
25892 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
25893 return cached_can_issue_more;
25894
25895 if (load_store_pendulum == 1)
25896 {
25897 /* A load has been issued in this cycle. Scan the ready list
25898 for another load to issue with it */
25899 pos = *pn_ready-1;
25900
25901 while (pos >= 0)
25902 {
25903 if (is_load_insn (ready[pos], &load_mem))
25904 {
25905 /* Found a load. Move it to the head of the ready list,
25906 and adjust it's priority so that it is more likely to
25907 stay there */
25908 tmp = ready[pos];
25909 for (i=pos; i<*pn_ready-1; i++)
25910 ready[i] = ready[i + 1];
25911 ready[*pn_ready-1] = tmp;
25912
25913 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25914 INSN_PRIORITY (tmp)++;
25915 break;
25916 }
25917 pos--;
25918 }
25919 }
25920 else if (load_store_pendulum == -2)
25921 {
25922 /* Two stores have been issued in this cycle. Increase the
25923 priority of the first load in the ready list to favor it for
25924 issuing in the next cycle. */
25925 pos = *pn_ready-1;
25926
25927 while (pos >= 0)
25928 {
25929 if (is_load_insn (ready[pos], &load_mem)
25930 && !sel_sched_p ()
25931 && INSN_PRIORITY_KNOWN (ready[pos]))
25932 {
25933 INSN_PRIORITY (ready[pos])++;
25934
25935 /* Adjust the pendulum to account for the fact that a load
25936 was found and increased in priority. This is to prevent
25937 increasing the priority of multiple loads */
25938 load_store_pendulum--;
25939
25940 break;
25941 }
25942 pos--;
25943 }
25944 }
25945 else if (load_store_pendulum == -1)
25946 {
25947 /* A store has been issued in this cycle. Scan the ready list for
25948 another store to issue with it, preferring a store to an adjacent
25949 memory location */
25950 int first_store_pos = -1;
25951
25952 pos = *pn_ready-1;
25953
25954 while (pos >= 0)
25955 {
25956 if (is_store_insn (ready[pos], &str_mem))
25957 {
25958 rtx str_mem2;
25959 /* Maintain the index of the first store found on the
25960 list */
25961 if (first_store_pos == -1)
25962 first_store_pos = pos;
25963
25964 if (is_store_insn (last_scheduled_insn, &str_mem2)
25965 && adjacent_mem_locations (str_mem, str_mem2))
25966 {
25967 /* Found an adjacent store. Move it to the head of the
25968 ready list, and adjust it's priority so that it is
25969 more likely to stay there */
25970 tmp = ready[pos];
25971 for (i=pos; i<*pn_ready-1; i++)
25972 ready[i] = ready[i + 1];
25973 ready[*pn_ready-1] = tmp;
25974
25975 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25976 INSN_PRIORITY (tmp)++;
25977
25978 first_store_pos = -1;
25979
25980 break;
25981 };
25982 }
25983 pos--;
25984 }
25985
25986 if (first_store_pos >= 0)
25987 {
25988 /* An adjacent store wasn't found, but a non-adjacent store was,
25989 so move the non-adjacent store to the front of the ready
25990 list, and adjust its priority so that it is more likely to
25991 stay there. */
25992 tmp = ready[first_store_pos];
25993 for (i=first_store_pos; i<*pn_ready-1; i++)
25994 ready[i] = ready[i + 1];
25995 ready[*pn_ready-1] = tmp;
25996 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25997 INSN_PRIORITY (tmp)++;
25998 }
25999 }
26000 else if (load_store_pendulum == 2)
26001 {
26002 /* Two loads have been issued in this cycle. Increase the priority
26003 of the first store in the ready list to favor it for issuing in
26004 the next cycle. */
26005 pos = *pn_ready-1;
26006
26007 while (pos >= 0)
26008 {
26009 if (is_store_insn (ready[pos], &str_mem)
26010 && !sel_sched_p ()
26011 && INSN_PRIORITY_KNOWN (ready[pos]))
26012 {
26013 INSN_PRIORITY (ready[pos])++;
26014
26015 /* Adjust the pendulum to account for the fact that a store
26016 was found and increased in priority. This is to prevent
26017 increasing the priority of multiple stores */
26018 load_store_pendulum++;
26019
26020 break;
26021 }
26022 pos--;
26023 }
26024 }
26025 }
26026
26027 /* Do Power9 dependent reordering if necessary. */
26028 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
26029 && recog_memoized (last_scheduled_insn) >= 0)
26030 return power9_sched_reorder2 (ready, *pn_ready - 1);
26031
26032 return cached_can_issue_more;
26033 }
26034
26035 /* Return whether the presence of INSN causes a dispatch group termination
26036 of group WHICH_GROUP.
26037
26038 If WHICH_GROUP == current_group, this function will return true if INSN
26039 causes the termination of the current group (i.e, the dispatch group to
26040 which INSN belongs). This means that INSN will be the last insn in the
26041 group it belongs to.
26042
26043 If WHICH_GROUP == previous_group, this function will return true if INSN
26044 causes the termination of the previous group (i.e, the dispatch group that
26045 precedes the group to which INSN belongs). This means that INSN will be
26046 the first insn in the group it belongs to). */
26047
26048 static bool
26049 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
26050 {
26051 bool first, last;
26052
26053 if (! insn)
26054 return false;
26055
26056 first = insn_must_be_first_in_group (insn);
26057 last = insn_must_be_last_in_group (insn);
26058
26059 if (first && last)
26060 return true;
26061
26062 if (which_group == current_group)
26063 return last;
26064 else if (which_group == previous_group)
26065 return first;
26066
26067 return false;
26068 }
26069
26070
26071 static bool
26072 insn_must_be_first_in_group (rtx_insn *insn)
26073 {
26074 enum attr_type type;
26075
26076 if (!insn
26077 || NOTE_P (insn)
26078 || DEBUG_INSN_P (insn)
26079 || GET_CODE (PATTERN (insn)) == USE
26080 || GET_CODE (PATTERN (insn)) == CLOBBER)
26081 return false;
26082
26083 switch (rs6000_tune)
26084 {
26085 case PROCESSOR_POWER5:
26086 if (is_cracked_insn (insn))
26087 return true;
26088 /* FALLTHRU */
26089 case PROCESSOR_POWER4:
26090 if (is_microcoded_insn (insn))
26091 return true;
26092
26093 if (!rs6000_sched_groups)
26094 return false;
26095
26096 type = get_attr_type (insn);
26097
26098 switch (type)
26099 {
26100 case TYPE_MFCR:
26101 case TYPE_MFCRF:
26102 case TYPE_MTCR:
26103 case TYPE_CR_LOGICAL:
26104 case TYPE_MTJMPR:
26105 case TYPE_MFJMPR:
26106 case TYPE_DIV:
26107 case TYPE_LOAD_L:
26108 case TYPE_STORE_C:
26109 case TYPE_ISYNC:
26110 case TYPE_SYNC:
26111 return true;
26112 default:
26113 break;
26114 }
26115 break;
26116 case PROCESSOR_POWER6:
26117 type = get_attr_type (insn);
26118
26119 switch (type)
26120 {
26121 case TYPE_EXTS:
26122 case TYPE_CNTLZ:
26123 case TYPE_TRAP:
26124 case TYPE_MUL:
26125 case TYPE_INSERT:
26126 case TYPE_FPCOMPARE:
26127 case TYPE_MFCR:
26128 case TYPE_MTCR:
26129 case TYPE_MFJMPR:
26130 case TYPE_MTJMPR:
26131 case TYPE_ISYNC:
26132 case TYPE_SYNC:
26133 case TYPE_LOAD_L:
26134 case TYPE_STORE_C:
26135 return true;
26136 case TYPE_SHIFT:
26137 if (get_attr_dot (insn) == DOT_NO
26138 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26139 return true;
26140 else
26141 break;
26142 case TYPE_DIV:
26143 if (get_attr_size (insn) == SIZE_32)
26144 return true;
26145 else
26146 break;
26147 case TYPE_LOAD:
26148 case TYPE_STORE:
26149 case TYPE_FPLOAD:
26150 case TYPE_FPSTORE:
26151 if (get_attr_update (insn) == UPDATE_YES)
26152 return true;
26153 else
26154 break;
26155 default:
26156 break;
26157 }
26158 break;
26159 case PROCESSOR_POWER7:
26160 type = get_attr_type (insn);
26161
26162 switch (type)
26163 {
26164 case TYPE_CR_LOGICAL:
26165 case TYPE_MFCR:
26166 case TYPE_MFCRF:
26167 case TYPE_MTCR:
26168 case TYPE_DIV:
26169 case TYPE_ISYNC:
26170 case TYPE_LOAD_L:
26171 case TYPE_STORE_C:
26172 case TYPE_MFJMPR:
26173 case TYPE_MTJMPR:
26174 return true;
26175 case TYPE_MUL:
26176 case TYPE_SHIFT:
26177 case TYPE_EXTS:
26178 if (get_attr_dot (insn) == DOT_YES)
26179 return true;
26180 else
26181 break;
26182 case TYPE_LOAD:
26183 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26184 || get_attr_update (insn) == UPDATE_YES)
26185 return true;
26186 else
26187 break;
26188 case TYPE_STORE:
26189 case TYPE_FPLOAD:
26190 case TYPE_FPSTORE:
26191 if (get_attr_update (insn) == UPDATE_YES)
26192 return true;
26193 else
26194 break;
26195 default:
26196 break;
26197 }
26198 break;
26199 case PROCESSOR_POWER8:
26200 type = get_attr_type (insn);
26201
26202 switch (type)
26203 {
26204 case TYPE_CR_LOGICAL:
26205 case TYPE_MFCR:
26206 case TYPE_MFCRF:
26207 case TYPE_MTCR:
26208 case TYPE_SYNC:
26209 case TYPE_ISYNC:
26210 case TYPE_LOAD_L:
26211 case TYPE_STORE_C:
26212 case TYPE_VECSTORE:
26213 case TYPE_MFJMPR:
26214 case TYPE_MTJMPR:
26215 return true;
26216 case TYPE_SHIFT:
26217 case TYPE_EXTS:
26218 case TYPE_MUL:
26219 if (get_attr_dot (insn) == DOT_YES)
26220 return true;
26221 else
26222 break;
26223 case TYPE_LOAD:
26224 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26225 || get_attr_update (insn) == UPDATE_YES)
26226 return true;
26227 else
26228 break;
26229 case TYPE_STORE:
26230 if (get_attr_update (insn) == UPDATE_YES
26231 && get_attr_indexed (insn) == INDEXED_YES)
26232 return true;
26233 else
26234 break;
26235 default:
26236 break;
26237 }
26238 break;
26239 default:
26240 break;
26241 }
26242
26243 return false;
26244 }
26245
26246 static bool
26247 insn_must_be_last_in_group (rtx_insn *insn)
26248 {
26249 enum attr_type type;
26250
26251 if (!insn
26252 || NOTE_P (insn)
26253 || DEBUG_INSN_P (insn)
26254 || GET_CODE (PATTERN (insn)) == USE
26255 || GET_CODE (PATTERN (insn)) == CLOBBER)
26256 return false;
26257
26258 switch (rs6000_tune) {
26259 case PROCESSOR_POWER4:
26260 case PROCESSOR_POWER5:
26261 if (is_microcoded_insn (insn))
26262 return true;
26263
26264 if (is_branch_slot_insn (insn))
26265 return true;
26266
26267 break;
26268 case PROCESSOR_POWER6:
26269 type = get_attr_type (insn);
26270
26271 switch (type)
26272 {
26273 case TYPE_EXTS:
26274 case TYPE_CNTLZ:
26275 case TYPE_TRAP:
26276 case TYPE_MUL:
26277 case TYPE_FPCOMPARE:
26278 case TYPE_MFCR:
26279 case TYPE_MTCR:
26280 case TYPE_MFJMPR:
26281 case TYPE_MTJMPR:
26282 case TYPE_ISYNC:
26283 case TYPE_SYNC:
26284 case TYPE_LOAD_L:
26285 case TYPE_STORE_C:
26286 return true;
26287 case TYPE_SHIFT:
26288 if (get_attr_dot (insn) == DOT_NO
26289 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26290 return true;
26291 else
26292 break;
26293 case TYPE_DIV:
26294 if (get_attr_size (insn) == SIZE_32)
26295 return true;
26296 else
26297 break;
26298 default:
26299 break;
26300 }
26301 break;
26302 case PROCESSOR_POWER7:
26303 type = get_attr_type (insn);
26304
26305 switch (type)
26306 {
26307 case TYPE_ISYNC:
26308 case TYPE_SYNC:
26309 case TYPE_LOAD_L:
26310 case TYPE_STORE_C:
26311 return true;
26312 case TYPE_LOAD:
26313 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26314 && get_attr_update (insn) == UPDATE_YES)
26315 return true;
26316 else
26317 break;
26318 case TYPE_STORE:
26319 if (get_attr_update (insn) == UPDATE_YES
26320 && get_attr_indexed (insn) == INDEXED_YES)
26321 return true;
26322 else
26323 break;
26324 default:
26325 break;
26326 }
26327 break;
26328 case PROCESSOR_POWER8:
26329 type = get_attr_type (insn);
26330
26331 switch (type)
26332 {
26333 case TYPE_MFCR:
26334 case TYPE_MTCR:
26335 case TYPE_ISYNC:
26336 case TYPE_SYNC:
26337 case TYPE_LOAD_L:
26338 case TYPE_STORE_C:
26339 return true;
26340 case TYPE_LOAD:
26341 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26342 && get_attr_update (insn) == UPDATE_YES)
26343 return true;
26344 else
26345 break;
26346 case TYPE_STORE:
26347 if (get_attr_update (insn) == UPDATE_YES
26348 && get_attr_indexed (insn) == INDEXED_YES)
26349 return true;
26350 else
26351 break;
26352 default:
26353 break;
26354 }
26355 break;
26356 default:
26357 break;
26358 }
26359
26360 return false;
26361 }
26362
26363 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
26364 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
26365
26366 static bool
26367 is_costly_group (rtx *group_insns, rtx next_insn)
26368 {
26369 int i;
26370 int issue_rate = rs6000_issue_rate ();
26371
26372 for (i = 0; i < issue_rate; i++)
26373 {
26374 sd_iterator_def sd_it;
26375 dep_t dep;
26376 rtx insn = group_insns[i];
26377
26378 if (!insn)
26379 continue;
26380
26381 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
26382 {
26383 rtx next = DEP_CON (dep);
26384
26385 if (next == next_insn
26386 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
26387 return true;
26388 }
26389 }
26390
26391 return false;
26392 }
26393
26394 /* Utility of the function redefine_groups.
26395 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
26396 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
26397 to keep it "far" (in a separate group) from GROUP_INSNS, following
26398 one of the following schemes, depending on the value of the flag
26399 -minsert_sched_nops = X:
26400 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
26401 in order to force NEXT_INSN into a separate group.
26402 (2) X < sched_finish_regroup_exact: insert exactly X nops.
26403 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
26404 insertion (has a group just ended, how many vacant issue slots remain in the
26405 last group, and how many dispatch groups were encountered so far). */
26406
26407 static int
26408 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
26409 rtx_insn *next_insn, bool *group_end, int can_issue_more,
26410 int *group_count)
26411 {
26412 rtx nop;
26413 bool force;
26414 int issue_rate = rs6000_issue_rate ();
26415 bool end = *group_end;
26416 int i;
26417
26418 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
26419 return can_issue_more;
26420
26421 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
26422 return can_issue_more;
26423
26424 force = is_costly_group (group_insns, next_insn);
26425 if (!force)
26426 return can_issue_more;
26427
26428 if (sched_verbose > 6)
26429 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
26430 *group_count ,can_issue_more);
26431
26432 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
26433 {
26434 if (*group_end)
26435 can_issue_more = 0;
26436
26437 /* Since only a branch can be issued in the last issue_slot, it is
26438 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
26439 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
26440 in this case the last nop will start a new group and the branch
26441 will be forced to the new group. */
26442 if (can_issue_more && !is_branch_slot_insn (next_insn))
26443 can_issue_more--;
26444
26445 /* Do we have a special group ending nop? */
26446 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
26447 || rs6000_tune == PROCESSOR_POWER8)
26448 {
26449 nop = gen_group_ending_nop ();
26450 emit_insn_before (nop, next_insn);
26451 can_issue_more = 0;
26452 }
26453 else
26454 while (can_issue_more > 0)
26455 {
26456 nop = gen_nop ();
26457 emit_insn_before (nop, next_insn);
26458 can_issue_more--;
26459 }
26460
26461 *group_end = true;
26462 return 0;
26463 }
26464
26465 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
26466 {
26467 int n_nops = rs6000_sched_insert_nops;
26468
26469 /* Nops can't be issued from the branch slot, so the effective
26470 issue_rate for nops is 'issue_rate - 1'. */
26471 if (can_issue_more == 0)
26472 can_issue_more = issue_rate;
26473 can_issue_more--;
26474 if (can_issue_more == 0)
26475 {
26476 can_issue_more = issue_rate - 1;
26477 (*group_count)++;
26478 end = true;
26479 for (i = 0; i < issue_rate; i++)
26480 {
26481 group_insns[i] = 0;
26482 }
26483 }
26484
26485 while (n_nops > 0)
26486 {
26487 nop = gen_nop ();
26488 emit_insn_before (nop, next_insn);
26489 if (can_issue_more == issue_rate - 1) /* new group begins */
26490 end = false;
26491 can_issue_more--;
26492 if (can_issue_more == 0)
26493 {
26494 can_issue_more = issue_rate - 1;
26495 (*group_count)++;
26496 end = true;
26497 for (i = 0; i < issue_rate; i++)
26498 {
26499 group_insns[i] = 0;
26500 }
26501 }
26502 n_nops--;
26503 }
26504
26505 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
26506 can_issue_more++;
26507
26508 /* Is next_insn going to start a new group? */
26509 *group_end
26510 = (end
26511 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26512 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26513 || (can_issue_more < issue_rate &&
26514 insn_terminates_group_p (next_insn, previous_group)));
26515 if (*group_end && end)
26516 (*group_count)--;
26517
26518 if (sched_verbose > 6)
26519 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
26520 *group_count, can_issue_more);
26521 return can_issue_more;
26522 }
26523
26524 return can_issue_more;
26525 }
26526
26527 /* This function tries to synch the dispatch groups that the compiler "sees"
26528 with the dispatch groups that the processor dispatcher is expected to
26529 form in practice. It tries to achieve this synchronization by forcing the
26530 estimated processor grouping on the compiler (as opposed to the function
26531 'pad_goups' which tries to force the scheduler's grouping on the processor).
26532
26533 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
26534 examines the (estimated) dispatch groups that will be formed by the processor
26535 dispatcher. It marks these group boundaries to reflect the estimated
26536 processor grouping, overriding the grouping that the scheduler had marked.
26537 Depending on the value of the flag '-minsert-sched-nops' this function can
26538 force certain insns into separate groups or force a certain distance between
26539 them by inserting nops, for example, if there exists a "costly dependence"
26540 between the insns.
26541
26542 The function estimates the group boundaries that the processor will form as
26543 follows: It keeps track of how many vacant issue slots are available after
26544 each insn. A subsequent insn will start a new group if one of the following
26545 4 cases applies:
26546 - no more vacant issue slots remain in the current dispatch group.
26547 - only the last issue slot, which is the branch slot, is vacant, but the next
26548 insn is not a branch.
26549 - only the last 2 or less issue slots, including the branch slot, are vacant,
26550 which means that a cracked insn (which occupies two issue slots) can't be
26551 issued in this group.
26552 - less than 'issue_rate' slots are vacant, and the next insn always needs to
26553 start a new group. */
26554
26555 static int
26556 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26557 rtx_insn *tail)
26558 {
26559 rtx_insn *insn, *next_insn;
26560 int issue_rate;
26561 int can_issue_more;
26562 int slot, i;
26563 bool group_end;
26564 int group_count = 0;
26565 rtx *group_insns;
26566
26567 /* Initialize. */
26568 issue_rate = rs6000_issue_rate ();
26569 group_insns = XALLOCAVEC (rtx, issue_rate);
26570 for (i = 0; i < issue_rate; i++)
26571 {
26572 group_insns[i] = 0;
26573 }
26574 can_issue_more = issue_rate;
26575 slot = 0;
26576 insn = get_next_active_insn (prev_head_insn, tail);
26577 group_end = false;
26578
26579 while (insn != NULL_RTX)
26580 {
26581 slot = (issue_rate - can_issue_more);
26582 group_insns[slot] = insn;
26583 can_issue_more =
26584 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26585 if (insn_terminates_group_p (insn, current_group))
26586 can_issue_more = 0;
26587
26588 next_insn = get_next_active_insn (insn, tail);
26589 if (next_insn == NULL_RTX)
26590 return group_count + 1;
26591
26592 /* Is next_insn going to start a new group? */
26593 group_end
26594 = (can_issue_more == 0
26595 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26596 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26597 || (can_issue_more < issue_rate &&
26598 insn_terminates_group_p (next_insn, previous_group)));
26599
26600 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
26601 next_insn, &group_end, can_issue_more,
26602 &group_count);
26603
26604 if (group_end)
26605 {
26606 group_count++;
26607 can_issue_more = 0;
26608 for (i = 0; i < issue_rate; i++)
26609 {
26610 group_insns[i] = 0;
26611 }
26612 }
26613
26614 if (GET_MODE (next_insn) == TImode && can_issue_more)
26615 PUT_MODE (next_insn, VOIDmode);
26616 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
26617 PUT_MODE (next_insn, TImode);
26618
26619 insn = next_insn;
26620 if (can_issue_more == 0)
26621 can_issue_more = issue_rate;
26622 } /* while */
26623
26624 return group_count;
26625 }
26626
26627 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
26628 dispatch group boundaries that the scheduler had marked. Pad with nops
26629 any dispatch groups which have vacant issue slots, in order to force the
26630 scheduler's grouping on the processor dispatcher. The function
26631 returns the number of dispatch groups found. */
26632
26633 static int
26634 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26635 rtx_insn *tail)
26636 {
26637 rtx_insn *insn, *next_insn;
26638 rtx nop;
26639 int issue_rate;
26640 int can_issue_more;
26641 int group_end;
26642 int group_count = 0;
26643
26644 /* Initialize issue_rate. */
26645 issue_rate = rs6000_issue_rate ();
26646 can_issue_more = issue_rate;
26647
26648 insn = get_next_active_insn (prev_head_insn, tail);
26649 next_insn = get_next_active_insn (insn, tail);
26650
26651 while (insn != NULL_RTX)
26652 {
26653 can_issue_more =
26654 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26655
26656 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
26657
26658 if (next_insn == NULL_RTX)
26659 break;
26660
26661 if (group_end)
26662 {
26663 /* If the scheduler had marked group termination at this location
26664 (between insn and next_insn), and neither insn nor next_insn will
26665 force group termination, pad the group with nops to force group
26666 termination. */
26667 if (can_issue_more
26668 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
26669 && !insn_terminates_group_p (insn, current_group)
26670 && !insn_terminates_group_p (next_insn, previous_group))
26671 {
26672 if (!is_branch_slot_insn (next_insn))
26673 can_issue_more--;
26674
26675 while (can_issue_more)
26676 {
26677 nop = gen_nop ();
26678 emit_insn_before (nop, next_insn);
26679 can_issue_more--;
26680 }
26681 }
26682
26683 can_issue_more = issue_rate;
26684 group_count++;
26685 }
26686
26687 insn = next_insn;
26688 next_insn = get_next_active_insn (insn, tail);
26689 }
26690
26691 return group_count;
26692 }
26693
26694 /* We're beginning a new block. Initialize data structures as necessary. */
26695
26696 static void
26697 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
26698 int sched_verbose ATTRIBUTE_UNUSED,
26699 int max_ready ATTRIBUTE_UNUSED)
26700 {
26701 last_scheduled_insn = NULL;
26702 load_store_pendulum = 0;
26703 divide_cnt = 0;
26704 vec_pairing = 0;
26705 }
26706
26707 /* The following function is called at the end of scheduling BB.
26708 After reload, it inserts nops at insn group bundling. */
26709
26710 static void
26711 rs6000_sched_finish (FILE *dump, int sched_verbose)
26712 {
26713 int n_groups;
26714
26715 if (sched_verbose)
26716 fprintf (dump, "=== Finishing schedule.\n");
26717
26718 if (reload_completed && rs6000_sched_groups)
26719 {
26720 /* Do not run sched_finish hook when selective scheduling enabled. */
26721 if (sel_sched_p ())
26722 return;
26723
26724 if (rs6000_sched_insert_nops == sched_finish_none)
26725 return;
26726
26727 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
26728 n_groups = pad_groups (dump, sched_verbose,
26729 current_sched_info->prev_head,
26730 current_sched_info->next_tail);
26731 else
26732 n_groups = redefine_groups (dump, sched_verbose,
26733 current_sched_info->prev_head,
26734 current_sched_info->next_tail);
26735
26736 if (sched_verbose >= 6)
26737 {
26738 fprintf (dump, "ngroups = %d\n", n_groups);
26739 print_rtl (dump, current_sched_info->prev_head);
26740 fprintf (dump, "Done finish_sched\n");
26741 }
26742 }
26743 }
26744
26745 struct rs6000_sched_context
26746 {
26747 short cached_can_issue_more;
26748 rtx_insn *last_scheduled_insn;
26749 int load_store_pendulum;
26750 int divide_cnt;
26751 int vec_pairing;
26752 };
26753
26754 typedef struct rs6000_sched_context rs6000_sched_context_def;
26755 typedef rs6000_sched_context_def *rs6000_sched_context_t;
26756
26757 /* Allocate store for new scheduling context. */
26758 static void *
26759 rs6000_alloc_sched_context (void)
26760 {
26761 return xmalloc (sizeof (rs6000_sched_context_def));
26762 }
26763
26764 /* If CLEAN_P is true then initializes _SC with clean data,
26765 and from the global context otherwise. */
26766 static void
26767 rs6000_init_sched_context (void *_sc, bool clean_p)
26768 {
26769 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26770
26771 if (clean_p)
26772 {
26773 sc->cached_can_issue_more = 0;
26774 sc->last_scheduled_insn = NULL;
26775 sc->load_store_pendulum = 0;
26776 sc->divide_cnt = 0;
26777 sc->vec_pairing = 0;
26778 }
26779 else
26780 {
26781 sc->cached_can_issue_more = cached_can_issue_more;
26782 sc->last_scheduled_insn = last_scheduled_insn;
26783 sc->load_store_pendulum = load_store_pendulum;
26784 sc->divide_cnt = divide_cnt;
26785 sc->vec_pairing = vec_pairing;
26786 }
26787 }
26788
26789 /* Sets the global scheduling context to the one pointed to by _SC. */
26790 static void
26791 rs6000_set_sched_context (void *_sc)
26792 {
26793 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26794
26795 gcc_assert (sc != NULL);
26796
26797 cached_can_issue_more = sc->cached_can_issue_more;
26798 last_scheduled_insn = sc->last_scheduled_insn;
26799 load_store_pendulum = sc->load_store_pendulum;
26800 divide_cnt = sc->divide_cnt;
26801 vec_pairing = sc->vec_pairing;
26802 }
26803
26804 /* Free _SC. */
26805 static void
26806 rs6000_free_sched_context (void *_sc)
26807 {
26808 gcc_assert (_sc != NULL);
26809
26810 free (_sc);
26811 }
26812
26813 static bool
26814 rs6000_sched_can_speculate_insn (rtx_insn *insn)
26815 {
26816 switch (get_attr_type (insn))
26817 {
26818 case TYPE_DIV:
26819 case TYPE_SDIV:
26820 case TYPE_DDIV:
26821 case TYPE_VECDIV:
26822 case TYPE_SSQRT:
26823 case TYPE_DSQRT:
26824 return false;
26825
26826 default:
26827 return true;
26828 }
26829 }
26830 \f
26831 /* Length in units of the trampoline for entering a nested function. */
26832
26833 int
26834 rs6000_trampoline_size (void)
26835 {
26836 int ret = 0;
26837
26838 switch (DEFAULT_ABI)
26839 {
26840 default:
26841 gcc_unreachable ();
26842
26843 case ABI_AIX:
26844 ret = (TARGET_32BIT) ? 12 : 24;
26845 break;
26846
26847 case ABI_ELFv2:
26848 gcc_assert (!TARGET_32BIT);
26849 ret = 32;
26850 break;
26851
26852 case ABI_DARWIN:
26853 case ABI_V4:
26854 ret = (TARGET_32BIT) ? 40 : 48;
26855 break;
26856 }
26857
26858 return ret;
26859 }
26860
26861 /* Emit RTL insns to initialize the variable parts of a trampoline.
26862 FNADDR is an RTX for the address of the function's pure code.
26863 CXT is an RTX for the static chain value for the function. */
26864
26865 static void
26866 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
26867 {
26868 int regsize = (TARGET_32BIT) ? 4 : 8;
26869 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
26870 rtx ctx_reg = force_reg (Pmode, cxt);
26871 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
26872
26873 switch (DEFAULT_ABI)
26874 {
26875 default:
26876 gcc_unreachable ();
26877
26878 /* Under AIX, just build the 3 word function descriptor */
26879 case ABI_AIX:
26880 {
26881 rtx fnmem, fn_reg, toc_reg;
26882
26883 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
26884 error ("you cannot take the address of a nested function if you use "
26885 "the %qs option", "-mno-pointers-to-nested-functions");
26886
26887 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
26888 fn_reg = gen_reg_rtx (Pmode);
26889 toc_reg = gen_reg_rtx (Pmode);
26890
26891 /* Macro to shorten the code expansions below. */
26892 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
26893
26894 m_tramp = replace_equiv_address (m_tramp, addr);
26895
26896 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
26897 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
26898 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
26899 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
26900 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
26901
26902 # undef MEM_PLUS
26903 }
26904 break;
26905
26906 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
26907 case ABI_ELFv2:
26908 case ABI_DARWIN:
26909 case ABI_V4:
26910 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
26911 LCT_NORMAL, VOIDmode,
26912 addr, Pmode,
26913 GEN_INT (rs6000_trampoline_size ()), SImode,
26914 fnaddr, Pmode,
26915 ctx_reg, Pmode);
26916 break;
26917 }
26918 }
26919
26920 \f
26921 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
26922 identifier as an argument, so the front end shouldn't look it up. */
26923
26924 static bool
26925 rs6000_attribute_takes_identifier_p (const_tree attr_id)
26926 {
26927 return is_attribute_p ("altivec", attr_id);
26928 }
26929
26930 /* Handle the "altivec" attribute. The attribute may have
26931 arguments as follows:
26932
26933 __attribute__((altivec(vector__)))
26934 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
26935 __attribute__((altivec(bool__))) (always followed by 'unsigned')
26936
26937 and may appear more than once (e.g., 'vector bool char') in a
26938 given declaration. */
26939
26940 static tree
26941 rs6000_handle_altivec_attribute (tree *node,
26942 tree name ATTRIBUTE_UNUSED,
26943 tree args,
26944 int flags ATTRIBUTE_UNUSED,
26945 bool *no_add_attrs)
26946 {
26947 tree type = *node, result = NULL_TREE;
26948 machine_mode mode;
26949 int unsigned_p;
26950 char altivec_type
26951 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
26952 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
26953 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
26954 : '?');
26955
26956 while (POINTER_TYPE_P (type)
26957 || TREE_CODE (type) == FUNCTION_TYPE
26958 || TREE_CODE (type) == METHOD_TYPE
26959 || TREE_CODE (type) == ARRAY_TYPE)
26960 type = TREE_TYPE (type);
26961
26962 mode = TYPE_MODE (type);
26963
26964 /* Check for invalid AltiVec type qualifiers. */
26965 if (type == long_double_type_node)
26966 error ("use of %<long double%> in AltiVec types is invalid");
26967 else if (type == boolean_type_node)
26968 error ("use of boolean types in AltiVec types is invalid");
26969 else if (TREE_CODE (type) == COMPLEX_TYPE)
26970 error ("use of %<complex%> in AltiVec types is invalid");
26971 else if (DECIMAL_FLOAT_MODE_P (mode))
26972 error ("use of decimal floating point types in AltiVec types is invalid");
26973 else if (!TARGET_VSX)
26974 {
26975 if (type == long_unsigned_type_node || type == long_integer_type_node)
26976 {
26977 if (TARGET_64BIT)
26978 error ("use of %<long%> in AltiVec types is invalid for "
26979 "64-bit code without %qs", "-mvsx");
26980 else if (rs6000_warn_altivec_long)
26981 warning (0, "use of %<long%> in AltiVec types is deprecated; "
26982 "use %<int%>");
26983 }
26984 else if (type == long_long_unsigned_type_node
26985 || type == long_long_integer_type_node)
26986 error ("use of %<long long%> in AltiVec types is invalid without %qs",
26987 "-mvsx");
26988 else if (type == double_type_node)
26989 error ("use of %<double%> in AltiVec types is invalid without %qs",
26990 "-mvsx");
26991 }
26992
26993 switch (altivec_type)
26994 {
26995 case 'v':
26996 unsigned_p = TYPE_UNSIGNED (type);
26997 switch (mode)
26998 {
26999 case E_TImode:
27000 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
27001 break;
27002 case E_DImode:
27003 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
27004 break;
27005 case E_SImode:
27006 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
27007 break;
27008 case E_HImode:
27009 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
27010 break;
27011 case E_QImode:
27012 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
27013 break;
27014 case E_SFmode: result = V4SF_type_node; break;
27015 case E_DFmode: result = V2DF_type_node; break;
27016 /* If the user says 'vector int bool', we may be handed the 'bool'
27017 attribute _before_ the 'vector' attribute, and so select the
27018 proper type in the 'b' case below. */
27019 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
27020 case E_V2DImode: case E_V2DFmode:
27021 result = type;
27022 default: break;
27023 }
27024 break;
27025 case 'b':
27026 switch (mode)
27027 {
27028 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
27029 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
27030 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
27031 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
27032 default: break;
27033 }
27034 break;
27035 case 'p':
27036 switch (mode)
27037 {
27038 case E_V8HImode: result = pixel_V8HI_type_node;
27039 default: break;
27040 }
27041 default: break;
27042 }
27043
27044 /* Propagate qualifiers attached to the element type
27045 onto the vector type. */
27046 if (result && result != type && TYPE_QUALS (type))
27047 result = build_qualified_type (result, TYPE_QUALS (type));
27048
27049 *no_add_attrs = true; /* No need to hang on to the attribute. */
27050
27051 if (result)
27052 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
27053
27054 return NULL_TREE;
27055 }
27056
27057 /* AltiVec defines five built-in scalar types that serve as vector
27058 elements; we must teach the compiler how to mangle them. The 128-bit
27059 floating point mangling is target-specific as well. */
27060
27061 static const char *
27062 rs6000_mangle_type (const_tree type)
27063 {
27064 type = TYPE_MAIN_VARIANT (type);
27065
27066 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27067 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27068 return NULL;
27069
27070 if (type == bool_char_type_node) return "U6__boolc";
27071 if (type == bool_short_type_node) return "U6__bools";
27072 if (type == pixel_type_node) return "u7__pixel";
27073 if (type == bool_int_type_node) return "U6__booli";
27074 if (type == bool_long_long_type_node) return "U6__boolx";
27075
27076 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
27077 return "g";
27078 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
27079 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
27080
27081 /* For all other types, use the default mangling. */
27082 return NULL;
27083 }
27084
27085 /* Handle a "longcall" or "shortcall" attribute; arguments as in
27086 struct attribute_spec.handler. */
27087
27088 static tree
27089 rs6000_handle_longcall_attribute (tree *node, tree name,
27090 tree args ATTRIBUTE_UNUSED,
27091 int flags ATTRIBUTE_UNUSED,
27092 bool *no_add_attrs)
27093 {
27094 if (TREE_CODE (*node) != FUNCTION_TYPE
27095 && TREE_CODE (*node) != FIELD_DECL
27096 && TREE_CODE (*node) != TYPE_DECL)
27097 {
27098 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27099 name);
27100 *no_add_attrs = true;
27101 }
27102
27103 return NULL_TREE;
27104 }
27105
27106 /* Set longcall attributes on all functions declared when
27107 rs6000_default_long_calls is true. */
27108 static void
27109 rs6000_set_default_type_attributes (tree type)
27110 {
27111 if (rs6000_default_long_calls
27112 && (TREE_CODE (type) == FUNCTION_TYPE
27113 || TREE_CODE (type) == METHOD_TYPE))
27114 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
27115 NULL_TREE,
27116 TYPE_ATTRIBUTES (type));
27117
27118 #if TARGET_MACHO
27119 darwin_set_default_type_attributes (type);
27120 #endif
27121 }
27122
27123 /* Return a reference suitable for calling a function with the
27124 longcall attribute. */
27125
27126 static rtx
27127 rs6000_longcall_ref (rtx call_ref, rtx arg)
27128 {
27129 /* System V adds '.' to the internal name, so skip them. */
27130 const char *call_name = XSTR (call_ref, 0);
27131 if (*call_name == '.')
27132 {
27133 while (*call_name == '.')
27134 call_name++;
27135
27136 tree node = get_identifier (call_name);
27137 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
27138 }
27139
27140 if (TARGET_PLTSEQ)
27141 {
27142 rtx base = const0_rtx;
27143 int regno = 12;
27144 if (rs6000_pcrel_p (cfun))
27145 {
27146 rtx reg = gen_rtx_REG (Pmode, regno);
27147 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27148 UNSPEC_PLT_PCREL);
27149 emit_insn (gen_rtx_SET (reg, u));
27150 return reg;
27151 }
27152
27153 if (DEFAULT_ABI == ABI_ELFv2)
27154 base = gen_rtx_REG (Pmode, TOC_REGISTER);
27155 else
27156 {
27157 if (flag_pic)
27158 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27159 regno = 11;
27160 }
27161 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
27162 may be used by a function global entry point. For SysV4, r11
27163 is used by __glink_PLTresolve lazy resolver entry. */
27164 rtx reg = gen_rtx_REG (Pmode, regno);
27165 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27166 UNSPEC_PLT16_HA);
27167 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
27168 UNSPEC_PLT16_LO);
27169 emit_insn (gen_rtx_SET (reg, hi));
27170 emit_insn (gen_rtx_SET (reg, lo));
27171 return reg;
27172 }
27173
27174 return force_reg (Pmode, call_ref);
27175 }
27176 \f
27177 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
27178 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
27179 #endif
27180
27181 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27182 struct attribute_spec.handler. */
27183 static tree
27184 rs6000_handle_struct_attribute (tree *node, tree name,
27185 tree args ATTRIBUTE_UNUSED,
27186 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27187 {
27188 tree *type = NULL;
27189 if (DECL_P (*node))
27190 {
27191 if (TREE_CODE (*node) == TYPE_DECL)
27192 type = &TREE_TYPE (*node);
27193 }
27194 else
27195 type = node;
27196
27197 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27198 || TREE_CODE (*type) == UNION_TYPE)))
27199 {
27200 warning (OPT_Wattributes, "%qE attribute ignored", name);
27201 *no_add_attrs = true;
27202 }
27203
27204 else if ((is_attribute_p ("ms_struct", name)
27205 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27206 || ((is_attribute_p ("gcc_struct", name)
27207 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27208 {
27209 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27210 name);
27211 *no_add_attrs = true;
27212 }
27213
27214 return NULL_TREE;
27215 }
27216
27217 static bool
27218 rs6000_ms_bitfield_layout_p (const_tree record_type)
27219 {
27220 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
27221 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27222 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27223 }
27224 \f
27225 #ifdef USING_ELFOS_H
27226
27227 /* A get_unnamed_section callback, used for switching to toc_section. */
27228
27229 static void
27230 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
27231 {
27232 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27233 && TARGET_MINIMAL_TOC)
27234 {
27235 if (!toc_initialized)
27236 {
27237 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27238 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27239 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
27240 fprintf (asm_out_file, "\t.tc ");
27241 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
27242 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27243 fprintf (asm_out_file, "\n");
27244
27245 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27246 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27247 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27248 fprintf (asm_out_file, " = .+32768\n");
27249 toc_initialized = 1;
27250 }
27251 else
27252 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27253 }
27254 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27255 {
27256 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27257 if (!toc_initialized)
27258 {
27259 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27260 toc_initialized = 1;
27261 }
27262 }
27263 else
27264 {
27265 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27266 if (!toc_initialized)
27267 {
27268 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27269 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27270 fprintf (asm_out_file, " = .+32768\n");
27271 toc_initialized = 1;
27272 }
27273 }
27274 }
27275
27276 /* Implement TARGET_ASM_INIT_SECTIONS. */
27277
27278 static void
27279 rs6000_elf_asm_init_sections (void)
27280 {
27281 toc_section
27282 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
27283
27284 sdata2_section
27285 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
27286 SDATA2_SECTION_ASM_OP);
27287 }
27288
27289 /* Implement TARGET_SELECT_RTX_SECTION. */
27290
27291 static section *
27292 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
27293 unsigned HOST_WIDE_INT align)
27294 {
27295 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
27296 return toc_section;
27297 else
27298 return default_elf_select_rtx_section (mode, x, align);
27299 }
27300 \f
27301 /* For a SYMBOL_REF, set generic flags and then perform some
27302 target-specific processing.
27303
27304 When the AIX ABI is requested on a non-AIX system, replace the
27305 function name with the real name (with a leading .) rather than the
27306 function descriptor name. This saves a lot of overriding code to
27307 read the prefixes. */
27308
27309 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
27310 static void
27311 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
27312 {
27313 default_encode_section_info (decl, rtl, first);
27314
27315 if (first
27316 && TREE_CODE (decl) == FUNCTION_DECL
27317 && !TARGET_AIX
27318 && DEFAULT_ABI == ABI_AIX)
27319 {
27320 rtx sym_ref = XEXP (rtl, 0);
27321 size_t len = strlen (XSTR (sym_ref, 0));
27322 char *str = XALLOCAVEC (char, len + 2);
27323 str[0] = '.';
27324 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
27325 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
27326 }
27327 }
27328
27329 static inline bool
27330 compare_section_name (const char *section, const char *templ)
27331 {
27332 int len;
27333
27334 len = strlen (templ);
27335 return (strncmp (section, templ, len) == 0
27336 && (section[len] == 0 || section[len] == '.'));
27337 }
27338
27339 bool
27340 rs6000_elf_in_small_data_p (const_tree decl)
27341 {
27342 if (rs6000_sdata == SDATA_NONE)
27343 return false;
27344
27345 /* We want to merge strings, so we never consider them small data. */
27346 if (TREE_CODE (decl) == STRING_CST)
27347 return false;
27348
27349 /* Functions are never in the small data area. */
27350 if (TREE_CODE (decl) == FUNCTION_DECL)
27351 return false;
27352
27353 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
27354 {
27355 const char *section = DECL_SECTION_NAME (decl);
27356 if (compare_section_name (section, ".sdata")
27357 || compare_section_name (section, ".sdata2")
27358 || compare_section_name (section, ".gnu.linkonce.s")
27359 || compare_section_name (section, ".sbss")
27360 || compare_section_name (section, ".sbss2")
27361 || compare_section_name (section, ".gnu.linkonce.sb")
27362 || strcmp (section, ".PPC.EMB.sdata0") == 0
27363 || strcmp (section, ".PPC.EMB.sbss0") == 0)
27364 return true;
27365 }
27366 else
27367 {
27368 /* If we are told not to put readonly data in sdata, then don't. */
27369 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
27370 && !rs6000_readonly_in_sdata)
27371 return false;
27372
27373 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
27374
27375 if (size > 0
27376 && size <= g_switch_value
27377 /* If it's not public, and we're not going to reference it there,
27378 there's no need to put it in the small data section. */
27379 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
27380 return true;
27381 }
27382
27383 return false;
27384 }
27385
27386 #endif /* USING_ELFOS_H */
27387 \f
27388 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
27389
27390 static bool
27391 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
27392 {
27393 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
27394 }
27395
27396 /* Do not place thread-local symbols refs in the object blocks. */
27397
27398 static bool
27399 rs6000_use_blocks_for_decl_p (const_tree decl)
27400 {
27401 return !DECL_THREAD_LOCAL_P (decl);
27402 }
27403 \f
27404 /* Return a REG that occurs in ADDR with coefficient 1.
27405 ADDR can be effectively incremented by incrementing REG.
27406
27407 r0 is special and we must not select it as an address
27408 register by this routine since our caller will try to
27409 increment the returned register via an "la" instruction. */
27410
27411 rtx
27412 find_addr_reg (rtx addr)
27413 {
27414 while (GET_CODE (addr) == PLUS)
27415 {
27416 if (REG_P (XEXP (addr, 0))
27417 && REGNO (XEXP (addr, 0)) != 0)
27418 addr = XEXP (addr, 0);
27419 else if (REG_P (XEXP (addr, 1))
27420 && REGNO (XEXP (addr, 1)) != 0)
27421 addr = XEXP (addr, 1);
27422 else if (CONSTANT_P (XEXP (addr, 0)))
27423 addr = XEXP (addr, 1);
27424 else if (CONSTANT_P (XEXP (addr, 1)))
27425 addr = XEXP (addr, 0);
27426 else
27427 gcc_unreachable ();
27428 }
27429 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
27430 return addr;
27431 }
27432
27433 void
27434 rs6000_fatal_bad_address (rtx op)
27435 {
27436 fatal_insn ("bad address", op);
27437 }
27438
27439 #if TARGET_MACHO
27440
27441 typedef struct branch_island_d {
27442 tree function_name;
27443 tree label_name;
27444 int line_number;
27445 } branch_island;
27446
27447
27448 static vec<branch_island, va_gc> *branch_islands;
27449
27450 /* Remember to generate a branch island for far calls to the given
27451 function. */
27452
27453 static void
27454 add_compiler_branch_island (tree label_name, tree function_name,
27455 int line_number)
27456 {
27457 branch_island bi = {function_name, label_name, line_number};
27458 vec_safe_push (branch_islands, bi);
27459 }
27460
27461 /* Generate far-jump branch islands for everything recorded in
27462 branch_islands. Invoked immediately after the last instruction of
27463 the epilogue has been emitted; the branch islands must be appended
27464 to, and contiguous with, the function body. Mach-O stubs are
27465 generated in machopic_output_stub(). */
27466
27467 static void
27468 macho_branch_islands (void)
27469 {
27470 char tmp_buf[512];
27471
27472 while (!vec_safe_is_empty (branch_islands))
27473 {
27474 branch_island *bi = &branch_islands->last ();
27475 const char *label = IDENTIFIER_POINTER (bi->label_name);
27476 const char *name = IDENTIFIER_POINTER (bi->function_name);
27477 char name_buf[512];
27478 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
27479 if (name[0] == '*' || name[0] == '&')
27480 strcpy (name_buf, name+1);
27481 else
27482 {
27483 name_buf[0] = '_';
27484 strcpy (name_buf+1, name);
27485 }
27486 strcpy (tmp_buf, "\n");
27487 strcat (tmp_buf, label);
27488 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
27489 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
27490 dbxout_stabd (N_SLINE, bi->line_number);
27491 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
27492 if (flag_pic)
27493 {
27494 if (TARGET_LINK_STACK)
27495 {
27496 char name[32];
27497 get_ppc476_thunk_name (name);
27498 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
27499 strcat (tmp_buf, name);
27500 strcat (tmp_buf, "\n");
27501 strcat (tmp_buf, label);
27502 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
27503 }
27504 else
27505 {
27506 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
27507 strcat (tmp_buf, label);
27508 strcat (tmp_buf, "_pic\n");
27509 strcat (tmp_buf, label);
27510 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
27511 }
27512
27513 strcat (tmp_buf, "\taddis r11,r11,ha16(");
27514 strcat (tmp_buf, name_buf);
27515 strcat (tmp_buf, " - ");
27516 strcat (tmp_buf, label);
27517 strcat (tmp_buf, "_pic)\n");
27518
27519 strcat (tmp_buf, "\tmtlr r0\n");
27520
27521 strcat (tmp_buf, "\taddi r12,r11,lo16(");
27522 strcat (tmp_buf, name_buf);
27523 strcat (tmp_buf, " - ");
27524 strcat (tmp_buf, label);
27525 strcat (tmp_buf, "_pic)\n");
27526
27527 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
27528 }
27529 else
27530 {
27531 strcat (tmp_buf, ":\n\tlis r12,hi16(");
27532 strcat (tmp_buf, name_buf);
27533 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
27534 strcat (tmp_buf, name_buf);
27535 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
27536 }
27537 output_asm_insn (tmp_buf, 0);
27538 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
27539 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
27540 dbxout_stabd (N_SLINE, bi->line_number);
27541 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
27542 branch_islands->pop ();
27543 }
27544 }
27545
27546 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
27547 already there or not. */
27548
27549 static int
27550 no_previous_def (tree function_name)
27551 {
27552 branch_island *bi;
27553 unsigned ix;
27554
27555 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27556 if (function_name == bi->function_name)
27557 return 0;
27558 return 1;
27559 }
27560
27561 /* GET_PREV_LABEL gets the label name from the previous definition of
27562 the function. */
27563
27564 static tree
27565 get_prev_label (tree function_name)
27566 {
27567 branch_island *bi;
27568 unsigned ix;
27569
27570 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27571 if (function_name == bi->function_name)
27572 return bi->label_name;
27573 return NULL_TREE;
27574 }
27575
27576 /* Generate PIC and indirect symbol stubs. */
27577
27578 void
27579 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27580 {
27581 unsigned int length;
27582 char *symbol_name, *lazy_ptr_name;
27583 char *local_label_0;
27584 static unsigned label = 0;
27585
27586 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27587 symb = (*targetm.strip_name_encoding) (symb);
27588
27589
27590 length = strlen (symb);
27591 symbol_name = XALLOCAVEC (char, length + 32);
27592 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27593
27594 lazy_ptr_name = XALLOCAVEC (char, length + 32);
27595 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
27596
27597 if (flag_pic == 2)
27598 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
27599 else
27600 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
27601
27602 if (flag_pic == 2)
27603 {
27604 fprintf (file, "\t.align 5\n");
27605
27606 fprintf (file, "%s:\n", stub);
27607 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27608
27609 label++;
27610 local_label_0 = XALLOCAVEC (char, 16);
27611 sprintf (local_label_0, "L%u$spb", label);
27612
27613 fprintf (file, "\tmflr r0\n");
27614 if (TARGET_LINK_STACK)
27615 {
27616 char name[32];
27617 get_ppc476_thunk_name (name);
27618 fprintf (file, "\tbl %s\n", name);
27619 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27620 }
27621 else
27622 {
27623 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
27624 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27625 }
27626 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
27627 lazy_ptr_name, local_label_0);
27628 fprintf (file, "\tmtlr r0\n");
27629 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
27630 (TARGET_64BIT ? "ldu" : "lwzu"),
27631 lazy_ptr_name, local_label_0);
27632 fprintf (file, "\tmtctr r12\n");
27633 fprintf (file, "\tbctr\n");
27634 }
27635 else
27636 {
27637 fprintf (file, "\t.align 4\n");
27638
27639 fprintf (file, "%s:\n", stub);
27640 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27641
27642 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
27643 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
27644 (TARGET_64BIT ? "ldu" : "lwzu"),
27645 lazy_ptr_name);
27646 fprintf (file, "\tmtctr r12\n");
27647 fprintf (file, "\tbctr\n");
27648 }
27649
27650 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27651 fprintf (file, "%s:\n", lazy_ptr_name);
27652 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27653 fprintf (file, "%sdyld_stub_binding_helper\n",
27654 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
27655 }
27656
27657 /* Legitimize PIC addresses. If the address is already
27658 position-independent, we return ORIG. Newly generated
27659 position-independent addresses go into a reg. This is REG if non
27660 zero, otherwise we allocate register(s) as necessary. */
27661
27662 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
27663
27664 rtx
27665 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
27666 rtx reg)
27667 {
27668 rtx base, offset;
27669
27670 if (reg == NULL && !reload_completed)
27671 reg = gen_reg_rtx (Pmode);
27672
27673 if (GET_CODE (orig) == CONST)
27674 {
27675 rtx reg_temp;
27676
27677 if (GET_CODE (XEXP (orig, 0)) == PLUS
27678 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
27679 return orig;
27680
27681 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
27682
27683 /* Use a different reg for the intermediate value, as
27684 it will be marked UNCHANGING. */
27685 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
27686 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
27687 Pmode, reg_temp);
27688 offset =
27689 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
27690 Pmode, reg);
27691
27692 if (CONST_INT_P (offset))
27693 {
27694 if (SMALL_INT (offset))
27695 return plus_constant (Pmode, base, INTVAL (offset));
27696 else if (!reload_completed)
27697 offset = force_reg (Pmode, offset);
27698 else
27699 {
27700 rtx mem = force_const_mem (Pmode, orig);
27701 return machopic_legitimize_pic_address (mem, Pmode, reg);
27702 }
27703 }
27704 return gen_rtx_PLUS (Pmode, base, offset);
27705 }
27706
27707 /* Fall back on generic machopic code. */
27708 return machopic_legitimize_pic_address (orig, mode, reg);
27709 }
27710
27711 /* Output a .machine directive for the Darwin assembler, and call
27712 the generic start_file routine. */
27713
27714 static void
27715 rs6000_darwin_file_start (void)
27716 {
27717 static const struct
27718 {
27719 const char *arg;
27720 const char *name;
27721 HOST_WIDE_INT if_set;
27722 } mapping[] = {
27723 { "ppc64", "ppc64", MASK_64BIT },
27724 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
27725 { "power4", "ppc970", 0 },
27726 { "G5", "ppc970", 0 },
27727 { "7450", "ppc7450", 0 },
27728 { "7400", "ppc7400", MASK_ALTIVEC },
27729 { "G4", "ppc7400", 0 },
27730 { "750", "ppc750", 0 },
27731 { "740", "ppc750", 0 },
27732 { "G3", "ppc750", 0 },
27733 { "604e", "ppc604e", 0 },
27734 { "604", "ppc604", 0 },
27735 { "603e", "ppc603", 0 },
27736 { "603", "ppc603", 0 },
27737 { "601", "ppc601", 0 },
27738 { NULL, "ppc", 0 } };
27739 const char *cpu_id = "";
27740 size_t i;
27741
27742 rs6000_file_start ();
27743 darwin_file_start ();
27744
27745 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
27746
27747 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
27748 cpu_id = rs6000_default_cpu;
27749
27750 if (global_options_set.x_rs6000_cpu_index)
27751 cpu_id = processor_target_table[rs6000_cpu_index].name;
27752
27753 /* Look through the mapping array. Pick the first name that either
27754 matches the argument, has a bit set in IF_SET that is also set
27755 in the target flags, or has a NULL name. */
27756
27757 i = 0;
27758 while (mapping[i].arg != NULL
27759 && strcmp (mapping[i].arg, cpu_id) != 0
27760 && (mapping[i].if_set & rs6000_isa_flags) == 0)
27761 i++;
27762
27763 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
27764 }
27765
27766 #endif /* TARGET_MACHO */
27767
27768 #if TARGET_ELF
27769 static int
27770 rs6000_elf_reloc_rw_mask (void)
27771 {
27772 if (flag_pic)
27773 return 3;
27774 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27775 return 2;
27776 else
27777 return 0;
27778 }
27779
27780 /* Record an element in the table of global constructors. SYMBOL is
27781 a SYMBOL_REF of the function to be called; PRIORITY is a number
27782 between 0 and MAX_INIT_PRIORITY.
27783
27784 This differs from default_named_section_asm_out_constructor in
27785 that we have special handling for -mrelocatable. */
27786
27787 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
27788 static void
27789 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
27790 {
27791 const char *section = ".ctors";
27792 char buf[18];
27793
27794 if (priority != DEFAULT_INIT_PRIORITY)
27795 {
27796 sprintf (buf, ".ctors.%.5u",
27797 /* Invert the numbering so the linker puts us in the proper
27798 order; constructors are run from right to left, and the
27799 linker sorts in increasing order. */
27800 MAX_INIT_PRIORITY - priority);
27801 section = buf;
27802 }
27803
27804 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27805 assemble_align (POINTER_SIZE);
27806
27807 if (DEFAULT_ABI == ABI_V4
27808 && (TARGET_RELOCATABLE || flag_pic > 1))
27809 {
27810 fputs ("\t.long (", asm_out_file);
27811 output_addr_const (asm_out_file, symbol);
27812 fputs (")@fixup\n", asm_out_file);
27813 }
27814 else
27815 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27816 }
27817
27818 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
27819 static void
27820 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
27821 {
27822 const char *section = ".dtors";
27823 char buf[18];
27824
27825 if (priority != DEFAULT_INIT_PRIORITY)
27826 {
27827 sprintf (buf, ".dtors.%.5u",
27828 /* Invert the numbering so the linker puts us in the proper
27829 order; constructors are run from right to left, and the
27830 linker sorts in increasing order. */
27831 MAX_INIT_PRIORITY - priority);
27832 section = buf;
27833 }
27834
27835 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27836 assemble_align (POINTER_SIZE);
27837
27838 if (DEFAULT_ABI == ABI_V4
27839 && (TARGET_RELOCATABLE || flag_pic > 1))
27840 {
27841 fputs ("\t.long (", asm_out_file);
27842 output_addr_const (asm_out_file, symbol);
27843 fputs (")@fixup\n", asm_out_file);
27844 }
27845 else
27846 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27847 }
27848
27849 void
27850 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
27851 {
27852 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
27853 {
27854 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
27855 ASM_OUTPUT_LABEL (file, name);
27856 fputs (DOUBLE_INT_ASM_OP, file);
27857 rs6000_output_function_entry (file, name);
27858 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
27859 if (DOT_SYMBOLS)
27860 {
27861 fputs ("\t.size\t", file);
27862 assemble_name (file, name);
27863 fputs (",24\n\t.type\t.", file);
27864 assemble_name (file, name);
27865 fputs (",@function\n", file);
27866 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
27867 {
27868 fputs ("\t.globl\t.", file);
27869 assemble_name (file, name);
27870 putc ('\n', file);
27871 }
27872 }
27873 else
27874 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27875 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27876 rs6000_output_function_entry (file, name);
27877 fputs (":\n", file);
27878 return;
27879 }
27880
27881 int uses_toc;
27882 if (DEFAULT_ABI == ABI_V4
27883 && (TARGET_RELOCATABLE || flag_pic > 1)
27884 && !TARGET_SECURE_PLT
27885 && (!constant_pool_empty_p () || crtl->profile)
27886 && (uses_toc = uses_TOC ()))
27887 {
27888 char buf[256];
27889
27890 if (uses_toc == 2)
27891 switch_to_other_text_partition ();
27892 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27893
27894 fprintf (file, "\t.long ");
27895 assemble_name (file, toc_label_name);
27896 need_toc_init = 1;
27897 putc ('-', file);
27898 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27899 assemble_name (file, buf);
27900 putc ('\n', file);
27901 if (uses_toc == 2)
27902 switch_to_other_text_partition ();
27903 }
27904
27905 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27906 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27907
27908 if (TARGET_CMODEL == CMODEL_LARGE
27909 && rs6000_global_entry_point_prologue_needed_p ())
27910 {
27911 char buf[256];
27912
27913 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27914
27915 fprintf (file, "\t.quad .TOC.-");
27916 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27917 assemble_name (file, buf);
27918 putc ('\n', file);
27919 }
27920
27921 if (DEFAULT_ABI == ABI_AIX)
27922 {
27923 const char *desc_name, *orig_name;
27924
27925 orig_name = (*targetm.strip_name_encoding) (name);
27926 desc_name = orig_name;
27927 while (*desc_name == '.')
27928 desc_name++;
27929
27930 if (TREE_PUBLIC (decl))
27931 fprintf (file, "\t.globl %s\n", desc_name);
27932
27933 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27934 fprintf (file, "%s:\n", desc_name);
27935 fprintf (file, "\t.long %s\n", orig_name);
27936 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
27937 fputs ("\t.long 0\n", file);
27938 fprintf (file, "\t.previous\n");
27939 }
27940 ASM_OUTPUT_LABEL (file, name);
27941 }
27942
27943 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
27944 static void
27945 rs6000_elf_file_end (void)
27946 {
27947 #ifdef HAVE_AS_GNU_ATTRIBUTE
27948 /* ??? The value emitted depends on options active at file end.
27949 Assume anyone using #pragma or attributes that might change
27950 options knows what they are doing. */
27951 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
27952 && rs6000_passes_float)
27953 {
27954 int fp;
27955
27956 if (TARGET_HARD_FLOAT)
27957 fp = 1;
27958 else
27959 fp = 2;
27960 if (rs6000_passes_long_double)
27961 {
27962 if (!TARGET_LONG_DOUBLE_128)
27963 fp |= 2 * 4;
27964 else if (TARGET_IEEEQUAD)
27965 fp |= 3 * 4;
27966 else
27967 fp |= 1 * 4;
27968 }
27969 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
27970 }
27971 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
27972 {
27973 if (rs6000_passes_vector)
27974 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
27975 (TARGET_ALTIVEC_ABI ? 2 : 1));
27976 if (rs6000_returns_struct)
27977 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
27978 aix_struct_return ? 2 : 1);
27979 }
27980 #endif
27981 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27982 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
27983 file_end_indicate_exec_stack ();
27984 #endif
27985
27986 if (flag_split_stack)
27987 file_end_indicate_split_stack ();
27988
27989 if (cpu_builtin_p)
27990 {
27991 /* We have expanded a CPU builtin, so we need to emit a reference to
27992 the special symbol that LIBC uses to declare it supports the
27993 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
27994 switch_to_section (data_section);
27995 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
27996 fprintf (asm_out_file, "\t%s %s\n",
27997 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
27998 }
27999 }
28000 #endif
28001
28002 #if TARGET_XCOFF
28003
28004 #ifndef HAVE_XCOFF_DWARF_EXTRAS
28005 #define HAVE_XCOFF_DWARF_EXTRAS 0
28006 #endif
28007
28008 static enum unwind_info_type
28009 rs6000_xcoff_debug_unwind_info (void)
28010 {
28011 return UI_NONE;
28012 }
28013
28014 static void
28015 rs6000_xcoff_asm_output_anchor (rtx symbol)
28016 {
28017 char buffer[100];
28018
28019 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
28020 SYMBOL_REF_BLOCK_OFFSET (symbol));
28021 fprintf (asm_out_file, "%s", SET_ASM_OP);
28022 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
28023 fprintf (asm_out_file, ",");
28024 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
28025 fprintf (asm_out_file, "\n");
28026 }
28027
28028 static void
28029 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
28030 {
28031 fputs (GLOBAL_ASM_OP, stream);
28032 RS6000_OUTPUT_BASENAME (stream, name);
28033 putc ('\n', stream);
28034 }
28035
28036 /* A get_unnamed_decl callback, used for read-only sections. PTR
28037 points to the section string variable. */
28038
28039 static void
28040 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
28041 {
28042 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
28043 *(const char *const *) directive,
28044 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28045 }
28046
28047 /* Likewise for read-write sections. */
28048
28049 static void
28050 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
28051 {
28052 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
28053 *(const char *const *) directive,
28054 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28055 }
28056
28057 static void
28058 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
28059 {
28060 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
28061 *(const char *const *) directive,
28062 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
28063 }
28064
28065 /* A get_unnamed_section callback, used for switching to toc_section. */
28066
28067 static void
28068 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28069 {
28070 if (TARGET_MINIMAL_TOC)
28071 {
28072 /* toc_section is always selected at least once from
28073 rs6000_xcoff_file_start, so this is guaranteed to
28074 always be defined once and only once in each file. */
28075 if (!toc_initialized)
28076 {
28077 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
28078 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
28079 toc_initialized = 1;
28080 }
28081 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
28082 (TARGET_32BIT ? "" : ",3"));
28083 }
28084 else
28085 fputs ("\t.toc\n", asm_out_file);
28086 }
28087
28088 /* Implement TARGET_ASM_INIT_SECTIONS. */
28089
28090 static void
28091 rs6000_xcoff_asm_init_sections (void)
28092 {
28093 read_only_data_section
28094 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28095 &xcoff_read_only_section_name);
28096
28097 private_data_section
28098 = get_unnamed_section (SECTION_WRITE,
28099 rs6000_xcoff_output_readwrite_section_asm_op,
28100 &xcoff_private_data_section_name);
28101
28102 read_only_private_data_section
28103 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28104 &xcoff_private_rodata_section_name);
28105
28106 tls_data_section
28107 = get_unnamed_section (SECTION_TLS,
28108 rs6000_xcoff_output_tls_section_asm_op,
28109 &xcoff_tls_data_section_name);
28110
28111 tls_private_data_section
28112 = get_unnamed_section (SECTION_TLS,
28113 rs6000_xcoff_output_tls_section_asm_op,
28114 &xcoff_private_data_section_name);
28115
28116 toc_section
28117 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
28118
28119 readonly_data_section = read_only_data_section;
28120 }
28121
28122 static int
28123 rs6000_xcoff_reloc_rw_mask (void)
28124 {
28125 return 3;
28126 }
28127
28128 static void
28129 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
28130 tree decl ATTRIBUTE_UNUSED)
28131 {
28132 int smclass;
28133 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
28134
28135 if (flags & SECTION_EXCLUDE)
28136 smclass = 4;
28137 else if (flags & SECTION_DEBUG)
28138 {
28139 fprintf (asm_out_file, "\t.dwsect %s\n", name);
28140 return;
28141 }
28142 else if (flags & SECTION_CODE)
28143 smclass = 0;
28144 else if (flags & SECTION_TLS)
28145 smclass = 3;
28146 else if (flags & SECTION_WRITE)
28147 smclass = 2;
28148 else
28149 smclass = 1;
28150
28151 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
28152 (flags & SECTION_CODE) ? "." : "",
28153 name, suffix[smclass], flags & SECTION_ENTSIZE);
28154 }
28155
28156 #define IN_NAMED_SECTION(DECL) \
28157 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
28158 && DECL_SECTION_NAME (DECL) != NULL)
28159
28160 static section *
28161 rs6000_xcoff_select_section (tree decl, int reloc,
28162 unsigned HOST_WIDE_INT align)
28163 {
28164 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
28165 named section. */
28166 if (align > BIGGEST_ALIGNMENT)
28167 {
28168 resolve_unique_section (decl, reloc, true);
28169 if (IN_NAMED_SECTION (decl))
28170 return get_named_section (decl, NULL, reloc);
28171 }
28172
28173 if (decl_readonly_section (decl, reloc))
28174 {
28175 if (TREE_PUBLIC (decl))
28176 return read_only_data_section;
28177 else
28178 return read_only_private_data_section;
28179 }
28180 else
28181 {
28182 #if HAVE_AS_TLS
28183 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28184 {
28185 if (TREE_PUBLIC (decl))
28186 return tls_data_section;
28187 else if (bss_initializer_p (decl))
28188 {
28189 /* Convert to COMMON to emit in BSS. */
28190 DECL_COMMON (decl) = 1;
28191 return tls_comm_section;
28192 }
28193 else
28194 return tls_private_data_section;
28195 }
28196 else
28197 #endif
28198 if (TREE_PUBLIC (decl))
28199 return data_section;
28200 else
28201 return private_data_section;
28202 }
28203 }
28204
28205 static void
28206 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
28207 {
28208 const char *name;
28209
28210 /* Use select_section for private data and uninitialized data with
28211 alignment <= BIGGEST_ALIGNMENT. */
28212 if (!TREE_PUBLIC (decl)
28213 || DECL_COMMON (decl)
28214 || (DECL_INITIAL (decl) == NULL_TREE
28215 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
28216 || DECL_INITIAL (decl) == error_mark_node
28217 || (flag_zero_initialized_in_bss
28218 && initializer_zerop (DECL_INITIAL (decl))))
28219 return;
28220
28221 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
28222 name = (*targetm.strip_name_encoding) (name);
28223 set_decl_section_name (decl, name);
28224 }
28225
28226 /* Select section for constant in constant pool.
28227
28228 On RS/6000, all constants are in the private read-only data area.
28229 However, if this is being placed in the TOC it must be output as a
28230 toc entry. */
28231
28232 static section *
28233 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
28234 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
28235 {
28236 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28237 return toc_section;
28238 else
28239 return read_only_private_data_section;
28240 }
28241
28242 /* Remove any trailing [DS] or the like from the symbol name. */
28243
28244 static const char *
28245 rs6000_xcoff_strip_name_encoding (const char *name)
28246 {
28247 size_t len;
28248 if (*name == '*')
28249 name++;
28250 len = strlen (name);
28251 if (name[len - 1] == ']')
28252 return ggc_alloc_string (name, len - 4);
28253 else
28254 return name;
28255 }
28256
28257 /* Section attributes. AIX is always PIC. */
28258
28259 static unsigned int
28260 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
28261 {
28262 unsigned int align;
28263 unsigned int flags = default_section_type_flags (decl, name, reloc);
28264
28265 /* Align to at least UNIT size. */
28266 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
28267 align = MIN_UNITS_PER_WORD;
28268 else
28269 /* Increase alignment of large objects if not already stricter. */
28270 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
28271 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
28272 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
28273
28274 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
28275 }
28276
28277 /* Output at beginning of assembler file.
28278
28279 Initialize the section names for the RS/6000 at this point.
28280
28281 Specify filename, including full path, to assembler.
28282
28283 We want to go into the TOC section so at least one .toc will be emitted.
28284 Also, in order to output proper .bs/.es pairs, we need at least one static
28285 [RW] section emitted.
28286
28287 Finally, declare mcount when profiling to make the assembler happy. */
28288
28289 static void
28290 rs6000_xcoff_file_start (void)
28291 {
28292 rs6000_gen_section_name (&xcoff_bss_section_name,
28293 main_input_filename, ".bss_");
28294 rs6000_gen_section_name (&xcoff_private_data_section_name,
28295 main_input_filename, ".rw_");
28296 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
28297 main_input_filename, ".rop_");
28298 rs6000_gen_section_name (&xcoff_read_only_section_name,
28299 main_input_filename, ".ro_");
28300 rs6000_gen_section_name (&xcoff_tls_data_section_name,
28301 main_input_filename, ".tls_");
28302 rs6000_gen_section_name (&xcoff_tbss_section_name,
28303 main_input_filename, ".tbss_[UL]");
28304
28305 fputs ("\t.file\t", asm_out_file);
28306 output_quoted_string (asm_out_file, main_input_filename);
28307 fputc ('\n', asm_out_file);
28308 if (write_symbols != NO_DEBUG)
28309 switch_to_section (private_data_section);
28310 switch_to_section (toc_section);
28311 switch_to_section (text_section);
28312 if (profile_flag)
28313 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
28314 rs6000_file_start ();
28315 }
28316
28317 /* Output at end of assembler file.
28318 On the RS/6000, referencing data should automatically pull in text. */
28319
28320 static void
28321 rs6000_xcoff_file_end (void)
28322 {
28323 switch_to_section (text_section);
28324 fputs ("_section_.text:\n", asm_out_file);
28325 switch_to_section (data_section);
28326 fputs (TARGET_32BIT
28327 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
28328 asm_out_file);
28329 }
28330
28331 struct declare_alias_data
28332 {
28333 FILE *file;
28334 bool function_descriptor;
28335 };
28336
28337 /* Declare alias N. A helper function for for_node_and_aliases. */
28338
28339 static bool
28340 rs6000_declare_alias (struct symtab_node *n, void *d)
28341 {
28342 struct declare_alias_data *data = (struct declare_alias_data *)d;
28343 /* Main symbol is output specially, because varasm machinery does part of
28344 the job for us - we do not need to declare .globl/lglobs and such. */
28345 if (!n->alias || n->weakref)
28346 return false;
28347
28348 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
28349 return false;
28350
28351 /* Prevent assemble_alias from trying to use .set pseudo operation
28352 that does not behave as expected by the middle-end. */
28353 TREE_ASM_WRITTEN (n->decl) = true;
28354
28355 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
28356 char *buffer = (char *) alloca (strlen (name) + 2);
28357 char *p;
28358 int dollar_inside = 0;
28359
28360 strcpy (buffer, name);
28361 p = strchr (buffer, '$');
28362 while (p) {
28363 *p = '_';
28364 dollar_inside++;
28365 p = strchr (p + 1, '$');
28366 }
28367 if (TREE_PUBLIC (n->decl))
28368 {
28369 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
28370 {
28371 if (dollar_inside) {
28372 if (data->function_descriptor)
28373 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28374 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28375 }
28376 if (data->function_descriptor)
28377 {
28378 fputs ("\t.globl .", data->file);
28379 RS6000_OUTPUT_BASENAME (data->file, buffer);
28380 putc ('\n', data->file);
28381 }
28382 fputs ("\t.globl ", data->file);
28383 RS6000_OUTPUT_BASENAME (data->file, buffer);
28384 putc ('\n', data->file);
28385 }
28386 #ifdef ASM_WEAKEN_DECL
28387 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
28388 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
28389 #endif
28390 }
28391 else
28392 {
28393 if (dollar_inside)
28394 {
28395 if (data->function_descriptor)
28396 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28397 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28398 }
28399 if (data->function_descriptor)
28400 {
28401 fputs ("\t.lglobl .", data->file);
28402 RS6000_OUTPUT_BASENAME (data->file, buffer);
28403 putc ('\n', data->file);
28404 }
28405 fputs ("\t.lglobl ", data->file);
28406 RS6000_OUTPUT_BASENAME (data->file, buffer);
28407 putc ('\n', data->file);
28408 }
28409 if (data->function_descriptor)
28410 fputs (".", data->file);
28411 RS6000_OUTPUT_BASENAME (data->file, buffer);
28412 fputs (":\n", data->file);
28413 return false;
28414 }
28415
28416
28417 #ifdef HAVE_GAS_HIDDEN
28418 /* Helper function to calculate visibility of a DECL
28419 and return the value as a const string. */
28420
28421 static const char *
28422 rs6000_xcoff_visibility (tree decl)
28423 {
28424 static const char * const visibility_types[] = {
28425 "", ",protected", ",hidden", ",internal"
28426 };
28427
28428 enum symbol_visibility vis = DECL_VISIBILITY (decl);
28429 return visibility_types[vis];
28430 }
28431 #endif
28432
28433
28434 /* This macro produces the initial definition of a function name.
28435 On the RS/6000, we need to place an extra '.' in the function name and
28436 output the function descriptor.
28437 Dollar signs are converted to underscores.
28438
28439 The csect for the function will have already been created when
28440 text_section was selected. We do have to go back to that csect, however.
28441
28442 The third and fourth parameters to the .function pseudo-op (16 and 044)
28443 are placeholders which no longer have any use.
28444
28445 Because AIX assembler's .set command has unexpected semantics, we output
28446 all aliases as alternative labels in front of the definition. */
28447
28448 void
28449 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
28450 {
28451 char *buffer = (char *) alloca (strlen (name) + 1);
28452 char *p;
28453 int dollar_inside = 0;
28454 struct declare_alias_data data = {file, false};
28455
28456 strcpy (buffer, name);
28457 p = strchr (buffer, '$');
28458 while (p) {
28459 *p = '_';
28460 dollar_inside++;
28461 p = strchr (p + 1, '$');
28462 }
28463 if (TREE_PUBLIC (decl))
28464 {
28465 if (!RS6000_WEAK || !DECL_WEAK (decl))
28466 {
28467 if (dollar_inside) {
28468 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28469 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28470 }
28471 fputs ("\t.globl .", file);
28472 RS6000_OUTPUT_BASENAME (file, buffer);
28473 #ifdef HAVE_GAS_HIDDEN
28474 fputs (rs6000_xcoff_visibility (decl), file);
28475 #endif
28476 putc ('\n', file);
28477 }
28478 }
28479 else
28480 {
28481 if (dollar_inside) {
28482 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28483 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28484 }
28485 fputs ("\t.lglobl .", file);
28486 RS6000_OUTPUT_BASENAME (file, buffer);
28487 putc ('\n', file);
28488 }
28489 fputs ("\t.csect ", file);
28490 RS6000_OUTPUT_BASENAME (file, buffer);
28491 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
28492 RS6000_OUTPUT_BASENAME (file, buffer);
28493 fputs (":\n", file);
28494 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28495 &data, true);
28496 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
28497 RS6000_OUTPUT_BASENAME (file, buffer);
28498 fputs (", TOC[tc0], 0\n", file);
28499 in_section = NULL;
28500 switch_to_section (function_section (decl));
28501 putc ('.', file);
28502 RS6000_OUTPUT_BASENAME (file, buffer);
28503 fputs (":\n", file);
28504 data.function_descriptor = true;
28505 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28506 &data, true);
28507 if (!DECL_IGNORED_P (decl))
28508 {
28509 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28510 xcoffout_declare_function (file, decl, buffer);
28511 else if (write_symbols == DWARF2_DEBUG)
28512 {
28513 name = (*targetm.strip_name_encoding) (name);
28514 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
28515 }
28516 }
28517 return;
28518 }
28519
28520
28521 /* Output assembly language to globalize a symbol from a DECL,
28522 possibly with visibility. */
28523
28524 void
28525 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
28526 {
28527 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
28528 fputs (GLOBAL_ASM_OP, stream);
28529 RS6000_OUTPUT_BASENAME (stream, name);
28530 #ifdef HAVE_GAS_HIDDEN
28531 fputs (rs6000_xcoff_visibility (decl), stream);
28532 #endif
28533 putc ('\n', stream);
28534 }
28535
28536 /* Output assembly language to define a symbol as COMMON from a DECL,
28537 possibly with visibility. */
28538
28539 void
28540 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
28541 tree decl ATTRIBUTE_UNUSED,
28542 const char *name,
28543 unsigned HOST_WIDE_INT size,
28544 unsigned HOST_WIDE_INT align)
28545 {
28546 unsigned HOST_WIDE_INT align2 = 2;
28547
28548 if (align > 32)
28549 align2 = floor_log2 (align / BITS_PER_UNIT);
28550 else if (size > 4)
28551 align2 = 3;
28552
28553 fputs (COMMON_ASM_OP, stream);
28554 RS6000_OUTPUT_BASENAME (stream, name);
28555
28556 fprintf (stream,
28557 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
28558 size, align2);
28559
28560 #ifdef HAVE_GAS_HIDDEN
28561 if (decl != NULL)
28562 fputs (rs6000_xcoff_visibility (decl), stream);
28563 #endif
28564 putc ('\n', stream);
28565 }
28566
28567 /* This macro produces the initial definition of a object (variable) name.
28568 Because AIX assembler's .set command has unexpected semantics, we output
28569 all aliases as alternative labels in front of the definition. */
28570
28571 void
28572 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
28573 {
28574 struct declare_alias_data data = {file, false};
28575 RS6000_OUTPUT_BASENAME (file, name);
28576 fputs (":\n", file);
28577 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28578 &data, true);
28579 }
28580
28581 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
28582
28583 void
28584 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
28585 {
28586 fputs (integer_asm_op (size, FALSE), file);
28587 assemble_name (file, label);
28588 fputs ("-$", file);
28589 }
28590
28591 /* Output a symbol offset relative to the dbase for the current object.
28592 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
28593 signed offsets.
28594
28595 __gcc_unwind_dbase is embedded in all executables/libraries through
28596 libgcc/config/rs6000/crtdbase.S. */
28597
28598 void
28599 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
28600 {
28601 fputs (integer_asm_op (size, FALSE), file);
28602 assemble_name (file, label);
28603 fputs("-__gcc_unwind_dbase", file);
28604 }
28605
28606 #ifdef HAVE_AS_TLS
28607 static void
28608 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
28609 {
28610 rtx symbol;
28611 int flags;
28612 const char *symname;
28613
28614 default_encode_section_info (decl, rtl, first);
28615
28616 /* Careful not to prod global register variables. */
28617 if (!MEM_P (rtl))
28618 return;
28619 symbol = XEXP (rtl, 0);
28620 if (!SYMBOL_REF_P (symbol))
28621 return;
28622
28623 flags = SYMBOL_REF_FLAGS (symbol);
28624
28625 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28626 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
28627
28628 SYMBOL_REF_FLAGS (symbol) = flags;
28629
28630 /* Append mapping class to extern decls. */
28631 symname = XSTR (symbol, 0);
28632 if (decl /* sync condition with assemble_external () */
28633 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
28634 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
28635 || TREE_CODE (decl) == FUNCTION_DECL)
28636 && symname[strlen (symname) - 1] != ']')
28637 {
28638 char *newname = (char *) alloca (strlen (symname) + 5);
28639 strcpy (newname, symname);
28640 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
28641 ? "[DS]" : "[UA]"));
28642 XSTR (symbol, 0) = ggc_strdup (newname);
28643 }
28644 }
28645 #endif /* HAVE_AS_TLS */
28646 #endif /* TARGET_XCOFF */
28647
28648 void
28649 rs6000_asm_weaken_decl (FILE *stream, tree decl,
28650 const char *name, const char *val)
28651 {
28652 fputs ("\t.weak\t", stream);
28653 RS6000_OUTPUT_BASENAME (stream, name);
28654 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28655 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28656 {
28657 if (TARGET_XCOFF)
28658 fputs ("[DS]", stream);
28659 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28660 if (TARGET_XCOFF)
28661 fputs (rs6000_xcoff_visibility (decl), stream);
28662 #endif
28663 fputs ("\n\t.weak\t.", stream);
28664 RS6000_OUTPUT_BASENAME (stream, name);
28665 }
28666 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28667 if (TARGET_XCOFF)
28668 fputs (rs6000_xcoff_visibility (decl), stream);
28669 #endif
28670 fputc ('\n', stream);
28671 if (val)
28672 {
28673 #ifdef ASM_OUTPUT_DEF
28674 ASM_OUTPUT_DEF (stream, name, val);
28675 #endif
28676 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28677 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28678 {
28679 fputs ("\t.set\t.", stream);
28680 RS6000_OUTPUT_BASENAME (stream, name);
28681 fputs (",.", stream);
28682 RS6000_OUTPUT_BASENAME (stream, val);
28683 fputc ('\n', stream);
28684 }
28685 }
28686 }
28687
28688
28689 /* Return true if INSN should not be copied. */
28690
28691 static bool
28692 rs6000_cannot_copy_insn_p (rtx_insn *insn)
28693 {
28694 return recog_memoized (insn) >= 0
28695 && get_attr_cannot_copy (insn);
28696 }
28697
28698 /* Compute a (partial) cost for rtx X. Return true if the complete
28699 cost has been computed, and false if subexpressions should be
28700 scanned. In either case, *TOTAL contains the cost result. */
28701
28702 static bool
28703 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
28704 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
28705 {
28706 int code = GET_CODE (x);
28707
28708 switch (code)
28709 {
28710 /* On the RS/6000, if it is valid in the insn, it is free. */
28711 case CONST_INT:
28712 if (((outer_code == SET
28713 || outer_code == PLUS
28714 || outer_code == MINUS)
28715 && (satisfies_constraint_I (x)
28716 || satisfies_constraint_L (x)))
28717 || (outer_code == AND
28718 && (satisfies_constraint_K (x)
28719 || (mode == SImode
28720 ? satisfies_constraint_L (x)
28721 : satisfies_constraint_J (x))))
28722 || ((outer_code == IOR || outer_code == XOR)
28723 && (satisfies_constraint_K (x)
28724 || (mode == SImode
28725 ? satisfies_constraint_L (x)
28726 : satisfies_constraint_J (x))))
28727 || outer_code == ASHIFT
28728 || outer_code == ASHIFTRT
28729 || outer_code == LSHIFTRT
28730 || outer_code == ROTATE
28731 || outer_code == ROTATERT
28732 || outer_code == ZERO_EXTRACT
28733 || (outer_code == MULT
28734 && satisfies_constraint_I (x))
28735 || ((outer_code == DIV || outer_code == UDIV
28736 || outer_code == MOD || outer_code == UMOD)
28737 && exact_log2 (INTVAL (x)) >= 0)
28738 || (outer_code == COMPARE
28739 && (satisfies_constraint_I (x)
28740 || satisfies_constraint_K (x)))
28741 || ((outer_code == EQ || outer_code == NE)
28742 && (satisfies_constraint_I (x)
28743 || satisfies_constraint_K (x)
28744 || (mode == SImode
28745 ? satisfies_constraint_L (x)
28746 : satisfies_constraint_J (x))))
28747 || (outer_code == GTU
28748 && satisfies_constraint_I (x))
28749 || (outer_code == LTU
28750 && satisfies_constraint_P (x)))
28751 {
28752 *total = 0;
28753 return true;
28754 }
28755 else if ((outer_code == PLUS
28756 && reg_or_add_cint_operand (x, VOIDmode))
28757 || (outer_code == MINUS
28758 && reg_or_sub_cint_operand (x, VOIDmode))
28759 || ((outer_code == SET
28760 || outer_code == IOR
28761 || outer_code == XOR)
28762 && (INTVAL (x)
28763 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
28764 {
28765 *total = COSTS_N_INSNS (1);
28766 return true;
28767 }
28768 /* FALLTHRU */
28769
28770 case CONST_DOUBLE:
28771 case CONST_WIDE_INT:
28772 case CONST:
28773 case HIGH:
28774 case SYMBOL_REF:
28775 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28776 return true;
28777
28778 case MEM:
28779 /* When optimizing for size, MEM should be slightly more expensive
28780 than generating address, e.g., (plus (reg) (const)).
28781 L1 cache latency is about two instructions. */
28782 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28783 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
28784 *total += COSTS_N_INSNS (100);
28785 return true;
28786
28787 case LABEL_REF:
28788 *total = 0;
28789 return true;
28790
28791 case PLUS:
28792 case MINUS:
28793 if (FLOAT_MODE_P (mode))
28794 *total = rs6000_cost->fp;
28795 else
28796 *total = COSTS_N_INSNS (1);
28797 return false;
28798
28799 case MULT:
28800 if (CONST_INT_P (XEXP (x, 1))
28801 && satisfies_constraint_I (XEXP (x, 1)))
28802 {
28803 if (INTVAL (XEXP (x, 1)) >= -256
28804 && INTVAL (XEXP (x, 1)) <= 255)
28805 *total = rs6000_cost->mulsi_const9;
28806 else
28807 *total = rs6000_cost->mulsi_const;
28808 }
28809 else if (mode == SFmode)
28810 *total = rs6000_cost->fp;
28811 else if (FLOAT_MODE_P (mode))
28812 *total = rs6000_cost->dmul;
28813 else if (mode == DImode)
28814 *total = rs6000_cost->muldi;
28815 else
28816 *total = rs6000_cost->mulsi;
28817 return false;
28818
28819 case FMA:
28820 if (mode == SFmode)
28821 *total = rs6000_cost->fp;
28822 else
28823 *total = rs6000_cost->dmul;
28824 break;
28825
28826 case DIV:
28827 case MOD:
28828 if (FLOAT_MODE_P (mode))
28829 {
28830 *total = mode == DFmode ? rs6000_cost->ddiv
28831 : rs6000_cost->sdiv;
28832 return false;
28833 }
28834 /* FALLTHRU */
28835
28836 case UDIV:
28837 case UMOD:
28838 if (CONST_INT_P (XEXP (x, 1))
28839 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
28840 {
28841 if (code == DIV || code == MOD)
28842 /* Shift, addze */
28843 *total = COSTS_N_INSNS (2);
28844 else
28845 /* Shift */
28846 *total = COSTS_N_INSNS (1);
28847 }
28848 else
28849 {
28850 if (GET_MODE (XEXP (x, 1)) == DImode)
28851 *total = rs6000_cost->divdi;
28852 else
28853 *total = rs6000_cost->divsi;
28854 }
28855 /* Add in shift and subtract for MOD unless we have a mod instruction. */
28856 if (!TARGET_MODULO && (code == MOD || code == UMOD))
28857 *total += COSTS_N_INSNS (2);
28858 return false;
28859
28860 case CTZ:
28861 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
28862 return false;
28863
28864 case FFS:
28865 *total = COSTS_N_INSNS (4);
28866 return false;
28867
28868 case POPCOUNT:
28869 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
28870 return false;
28871
28872 case PARITY:
28873 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
28874 return false;
28875
28876 case NOT:
28877 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
28878 *total = 0;
28879 else
28880 *total = COSTS_N_INSNS (1);
28881 return false;
28882
28883 case AND:
28884 if (CONST_INT_P (XEXP (x, 1)))
28885 {
28886 rtx left = XEXP (x, 0);
28887 rtx_code left_code = GET_CODE (left);
28888
28889 /* rotate-and-mask: 1 insn. */
28890 if ((left_code == ROTATE
28891 || left_code == ASHIFT
28892 || left_code == LSHIFTRT)
28893 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
28894 {
28895 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
28896 if (!CONST_INT_P (XEXP (left, 1)))
28897 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
28898 *total += COSTS_N_INSNS (1);
28899 return true;
28900 }
28901
28902 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
28903 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
28904 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
28905 || (val & 0xffff) == val
28906 || (val & 0xffff0000) == val
28907 || ((val & 0xffff) == 0 && mode == SImode))
28908 {
28909 *total = rtx_cost (left, mode, AND, 0, speed);
28910 *total += COSTS_N_INSNS (1);
28911 return true;
28912 }
28913
28914 /* 2 insns. */
28915 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
28916 {
28917 *total = rtx_cost (left, mode, AND, 0, speed);
28918 *total += COSTS_N_INSNS (2);
28919 return true;
28920 }
28921 }
28922
28923 *total = COSTS_N_INSNS (1);
28924 return false;
28925
28926 case IOR:
28927 /* FIXME */
28928 *total = COSTS_N_INSNS (1);
28929 return true;
28930
28931 case CLZ:
28932 case XOR:
28933 case ZERO_EXTRACT:
28934 *total = COSTS_N_INSNS (1);
28935 return false;
28936
28937 case ASHIFT:
28938 /* The EXTSWSLI instruction is a combined instruction. Don't count both
28939 the sign extend and shift separately within the insn. */
28940 if (TARGET_EXTSWSLI && mode == DImode
28941 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
28942 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
28943 {
28944 *total = 0;
28945 return false;
28946 }
28947 /* fall through */
28948
28949 case ASHIFTRT:
28950 case LSHIFTRT:
28951 case ROTATE:
28952 case ROTATERT:
28953 /* Handle mul_highpart. */
28954 if (outer_code == TRUNCATE
28955 && GET_CODE (XEXP (x, 0)) == MULT)
28956 {
28957 if (mode == DImode)
28958 *total = rs6000_cost->muldi;
28959 else
28960 *total = rs6000_cost->mulsi;
28961 return true;
28962 }
28963 else if (outer_code == AND)
28964 *total = 0;
28965 else
28966 *total = COSTS_N_INSNS (1);
28967 return false;
28968
28969 case SIGN_EXTEND:
28970 case ZERO_EXTEND:
28971 if (MEM_P (XEXP (x, 0)))
28972 *total = 0;
28973 else
28974 *total = COSTS_N_INSNS (1);
28975 return false;
28976
28977 case COMPARE:
28978 case NEG:
28979 case ABS:
28980 if (!FLOAT_MODE_P (mode))
28981 {
28982 *total = COSTS_N_INSNS (1);
28983 return false;
28984 }
28985 /* FALLTHRU */
28986
28987 case FLOAT:
28988 case UNSIGNED_FLOAT:
28989 case FIX:
28990 case UNSIGNED_FIX:
28991 case FLOAT_TRUNCATE:
28992 *total = rs6000_cost->fp;
28993 return false;
28994
28995 case FLOAT_EXTEND:
28996 if (mode == DFmode)
28997 *total = rs6000_cost->sfdf_convert;
28998 else
28999 *total = rs6000_cost->fp;
29000 return false;
29001
29002 case UNSPEC:
29003 switch (XINT (x, 1))
29004 {
29005 case UNSPEC_FRSP:
29006 *total = rs6000_cost->fp;
29007 return true;
29008
29009 default:
29010 break;
29011 }
29012 break;
29013
29014 case CALL:
29015 case IF_THEN_ELSE:
29016 if (!speed)
29017 {
29018 *total = COSTS_N_INSNS (1);
29019 return true;
29020 }
29021 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
29022 {
29023 *total = rs6000_cost->fp;
29024 return false;
29025 }
29026 break;
29027
29028 case NE:
29029 case EQ:
29030 case GTU:
29031 case LTU:
29032 /* Carry bit requires mode == Pmode.
29033 NEG or PLUS already counted so only add one. */
29034 if (mode == Pmode
29035 && (outer_code == NEG || outer_code == PLUS))
29036 {
29037 *total = COSTS_N_INSNS (1);
29038 return true;
29039 }
29040 /* FALLTHRU */
29041
29042 case GT:
29043 case LT:
29044 case UNORDERED:
29045 if (outer_code == SET)
29046 {
29047 if (XEXP (x, 1) == const0_rtx)
29048 {
29049 *total = COSTS_N_INSNS (2);
29050 return true;
29051 }
29052 else
29053 {
29054 *total = COSTS_N_INSNS (3);
29055 return false;
29056 }
29057 }
29058 /* CC COMPARE. */
29059 if (outer_code == COMPARE)
29060 {
29061 *total = 0;
29062 return true;
29063 }
29064 break;
29065
29066 default:
29067 break;
29068 }
29069
29070 return false;
29071 }
29072
29073 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
29074
29075 static bool
29076 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
29077 int opno, int *total, bool speed)
29078 {
29079 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
29080
29081 fprintf (stderr,
29082 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
29083 "opno = %d, total = %d, speed = %s, x:\n",
29084 ret ? "complete" : "scan inner",
29085 GET_MODE_NAME (mode),
29086 GET_RTX_NAME (outer_code),
29087 opno,
29088 *total,
29089 speed ? "true" : "false");
29090
29091 debug_rtx (x);
29092
29093 return ret;
29094 }
29095
29096 static int
29097 rs6000_insn_cost (rtx_insn *insn, bool speed)
29098 {
29099 if (recog_memoized (insn) < 0)
29100 return 0;
29101
29102 if (!speed)
29103 return get_attr_length (insn);
29104
29105 int cost = get_attr_cost (insn);
29106 if (cost > 0)
29107 return cost;
29108
29109 int n = get_attr_length (insn) / 4;
29110 enum attr_type type = get_attr_type (insn);
29111
29112 switch (type)
29113 {
29114 case TYPE_LOAD:
29115 case TYPE_FPLOAD:
29116 case TYPE_VECLOAD:
29117 cost = COSTS_N_INSNS (n + 1);
29118 break;
29119
29120 case TYPE_MUL:
29121 switch (get_attr_size (insn))
29122 {
29123 case SIZE_8:
29124 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
29125 break;
29126 case SIZE_16:
29127 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
29128 break;
29129 case SIZE_32:
29130 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
29131 break;
29132 case SIZE_64:
29133 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
29134 break;
29135 default:
29136 gcc_unreachable ();
29137 }
29138 break;
29139 case TYPE_DIV:
29140 switch (get_attr_size (insn))
29141 {
29142 case SIZE_32:
29143 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
29144 break;
29145 case SIZE_64:
29146 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
29147 break;
29148 default:
29149 gcc_unreachable ();
29150 }
29151 break;
29152
29153 case TYPE_FP:
29154 cost = n * rs6000_cost->fp;
29155 break;
29156 case TYPE_DMUL:
29157 cost = n * rs6000_cost->dmul;
29158 break;
29159 case TYPE_SDIV:
29160 cost = n * rs6000_cost->sdiv;
29161 break;
29162 case TYPE_DDIV:
29163 cost = n * rs6000_cost->ddiv;
29164 break;
29165
29166 case TYPE_SYNC:
29167 case TYPE_LOAD_L:
29168 case TYPE_MFCR:
29169 case TYPE_MFCRF:
29170 cost = COSTS_N_INSNS (n + 2);
29171 break;
29172
29173 default:
29174 cost = COSTS_N_INSNS (n);
29175 }
29176
29177 return cost;
29178 }
29179
29180 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
29181
29182 static int
29183 rs6000_debug_address_cost (rtx x, machine_mode mode,
29184 addr_space_t as, bool speed)
29185 {
29186 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
29187
29188 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
29189 ret, speed ? "true" : "false");
29190 debug_rtx (x);
29191
29192 return ret;
29193 }
29194
29195
29196 /* A C expression returning the cost of moving data from a register of class
29197 CLASS1 to one of CLASS2. */
29198
29199 static int
29200 rs6000_register_move_cost (machine_mode mode,
29201 reg_class_t from, reg_class_t to)
29202 {
29203 int ret;
29204 reg_class_t rclass;
29205
29206 if (TARGET_DEBUG_COST)
29207 dbg_cost_ctrl++;
29208
29209 /* If we have VSX, we can easily move between FPR or Altivec registers,
29210 otherwise we can only easily move within classes.
29211 Do this first so we give best-case answers for union classes
29212 containing both gprs and vsx regs. */
29213 HARD_REG_SET to_vsx, from_vsx;
29214 COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
29215 AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
29216 COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
29217 AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
29218 if (!hard_reg_set_empty_p (to_vsx)
29219 && !hard_reg_set_empty_p (from_vsx)
29220 && (TARGET_VSX
29221 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
29222 {
29223 int reg = FIRST_FPR_REGNO;
29224 if (TARGET_VSX
29225 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
29226 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
29227 reg = FIRST_ALTIVEC_REGNO;
29228 ret = 2 * hard_regno_nregs (reg, mode);
29229 }
29230
29231 /* Moves from/to GENERAL_REGS. */
29232 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
29233 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
29234 {
29235 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
29236 {
29237 if (TARGET_DIRECT_MOVE)
29238 {
29239 /* Keep the cost for direct moves above that for within
29240 a register class even if the actual processor cost is
29241 comparable. We do this because a direct move insn
29242 can't be a nop, whereas with ideal register
29243 allocation a move within the same class might turn
29244 out to be a nop. */
29245 if (rs6000_tune == PROCESSOR_POWER9
29246 || rs6000_tune == PROCESSOR_FUTURE)
29247 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29248 else
29249 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29250 /* SFmode requires a conversion when moving between gprs
29251 and vsx. */
29252 if (mode == SFmode)
29253 ret += 2;
29254 }
29255 else
29256 ret = (rs6000_memory_move_cost (mode, rclass, false)
29257 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
29258 }
29259
29260 /* It's more expensive to move CR_REGS than CR0_REGS because of the
29261 shift. */
29262 else if (rclass == CR_REGS)
29263 ret = 4;
29264
29265 /* For those processors that have slow LR/CTR moves, make them more
29266 expensive than memory in order to bias spills to memory .*/
29267 else if ((rs6000_tune == PROCESSOR_POWER6
29268 || rs6000_tune == PROCESSOR_POWER7
29269 || rs6000_tune == PROCESSOR_POWER8
29270 || rs6000_tune == PROCESSOR_POWER9)
29271 && reg_class_subset_p (rclass, SPECIAL_REGS))
29272 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29273
29274 else
29275 /* A move will cost one instruction per GPR moved. */
29276 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29277 }
29278
29279 /* Everything else has to go through GENERAL_REGS. */
29280 else
29281 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
29282 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
29283
29284 if (TARGET_DEBUG_COST)
29285 {
29286 if (dbg_cost_ctrl == 1)
29287 fprintf (stderr,
29288 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
29289 ret, GET_MODE_NAME (mode), reg_class_names[from],
29290 reg_class_names[to]);
29291 dbg_cost_ctrl--;
29292 }
29293
29294 return ret;
29295 }
29296
29297 /* A C expressions returning the cost of moving data of MODE from a register to
29298 or from memory. */
29299
29300 static int
29301 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
29302 bool in ATTRIBUTE_UNUSED)
29303 {
29304 int ret;
29305
29306 if (TARGET_DEBUG_COST)
29307 dbg_cost_ctrl++;
29308
29309 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
29310 ret = 4 * hard_regno_nregs (0, mode);
29311 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
29312 || reg_classes_intersect_p (rclass, VSX_REGS)))
29313 ret = 4 * hard_regno_nregs (32, mode);
29314 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
29315 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
29316 else
29317 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
29318
29319 if (TARGET_DEBUG_COST)
29320 {
29321 if (dbg_cost_ctrl == 1)
29322 fprintf (stderr,
29323 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
29324 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
29325 dbg_cost_ctrl--;
29326 }
29327
29328 return ret;
29329 }
29330
29331 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
29332
29333 The register allocator chooses GEN_OR_VSX_REGS for the allocno
29334 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
29335 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
29336 move cost between GENERAL_REGS and VSX_REGS low.
29337
29338 It might seem reasonable to use a union class. After all, if usage
29339 of vsr is low and gpr high, it might make sense to spill gpr to vsr
29340 rather than memory. However, in cases where register pressure of
29341 both is high, like the cactus_adm spec test, allowing
29342 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
29343 the first scheduling pass. This is partly due to an allocno of
29344 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
29345 class, which gives too high a pressure for GENERAL_REGS and too low
29346 for VSX_REGS. So, force a choice of the subclass here.
29347
29348 The best class is also the union if GENERAL_REGS and VSX_REGS have
29349 the same cost. In that case we do use GEN_OR_VSX_REGS as the
29350 allocno class, since trying to narrow down the class by regno mode
29351 is prone to error. For example, SImode is allowed in VSX regs and
29352 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
29353 it would be wrong to choose an allocno of GENERAL_REGS based on
29354 SImode. */
29355
29356 static reg_class_t
29357 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
29358 reg_class_t allocno_class,
29359 reg_class_t best_class)
29360 {
29361 switch (allocno_class)
29362 {
29363 case GEN_OR_VSX_REGS:
29364 /* best_class must be a subset of allocno_class. */
29365 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
29366 || best_class == GEN_OR_FLOAT_REGS
29367 || best_class == VSX_REGS
29368 || best_class == ALTIVEC_REGS
29369 || best_class == FLOAT_REGS
29370 || best_class == GENERAL_REGS
29371 || best_class == BASE_REGS);
29372 /* Use best_class but choose wider classes when copying from the
29373 wider class to best_class is cheap. This mimics IRA choice
29374 of allocno class. */
29375 if (best_class == BASE_REGS)
29376 return GENERAL_REGS;
29377 if (TARGET_VSX
29378 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
29379 return VSX_REGS;
29380 return best_class;
29381
29382 default:
29383 break;
29384 }
29385
29386 return allocno_class;
29387 }
29388
29389 /* Returns a code for a target-specific builtin that implements
29390 reciprocal of the function, or NULL_TREE if not available. */
29391
29392 static tree
29393 rs6000_builtin_reciprocal (tree fndecl)
29394 {
29395 switch (DECL_FUNCTION_CODE (fndecl))
29396 {
29397 case VSX_BUILTIN_XVSQRTDP:
29398 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
29399 return NULL_TREE;
29400
29401 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
29402
29403 case VSX_BUILTIN_XVSQRTSP:
29404 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
29405 return NULL_TREE;
29406
29407 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
29408
29409 default:
29410 return NULL_TREE;
29411 }
29412 }
29413
29414 /* Load up a constant. If the mode is a vector mode, splat the value across
29415 all of the vector elements. */
29416
29417 static rtx
29418 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
29419 {
29420 rtx reg;
29421
29422 if (mode == SFmode || mode == DFmode)
29423 {
29424 rtx d = const_double_from_real_value (dconst, mode);
29425 reg = force_reg (mode, d);
29426 }
29427 else if (mode == V4SFmode)
29428 {
29429 rtx d = const_double_from_real_value (dconst, SFmode);
29430 rtvec v = gen_rtvec (4, d, d, d, d);
29431 reg = gen_reg_rtx (mode);
29432 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29433 }
29434 else if (mode == V2DFmode)
29435 {
29436 rtx d = const_double_from_real_value (dconst, DFmode);
29437 rtvec v = gen_rtvec (2, d, d);
29438 reg = gen_reg_rtx (mode);
29439 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29440 }
29441 else
29442 gcc_unreachable ();
29443
29444 return reg;
29445 }
29446
29447 /* Generate an FMA instruction. */
29448
29449 static void
29450 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
29451 {
29452 machine_mode mode = GET_MODE (target);
29453 rtx dst;
29454
29455 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
29456 gcc_assert (dst != NULL);
29457
29458 if (dst != target)
29459 emit_move_insn (target, dst);
29460 }
29461
29462 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
29463
29464 static void
29465 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
29466 {
29467 machine_mode mode = GET_MODE (dst);
29468 rtx r;
29469
29470 /* This is a tad more complicated, since the fnma_optab is for
29471 a different expression: fma(-m1, m2, a), which is the same
29472 thing except in the case of signed zeros.
29473
29474 Fortunately we know that if FMA is supported that FNMSUB is
29475 also supported in the ISA. Just expand it directly. */
29476
29477 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
29478
29479 r = gen_rtx_NEG (mode, a);
29480 r = gen_rtx_FMA (mode, m1, m2, r);
29481 r = gen_rtx_NEG (mode, r);
29482 emit_insn (gen_rtx_SET (dst, r));
29483 }
29484
29485 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
29486 add a reg_note saying that this was a division. Support both scalar and
29487 vector divide. Assumes no trapping math and finite arguments. */
29488
29489 void
29490 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
29491 {
29492 machine_mode mode = GET_MODE (dst);
29493 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
29494 int i;
29495
29496 /* Low precision estimates guarantee 5 bits of accuracy. High
29497 precision estimates guarantee 14 bits of accuracy. SFmode
29498 requires 23 bits of accuracy. DFmode requires 52 bits of
29499 accuracy. Each pass at least doubles the accuracy, leading
29500 to the following. */
29501 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29502 if (mode == DFmode || mode == V2DFmode)
29503 passes++;
29504
29505 enum insn_code code = optab_handler (smul_optab, mode);
29506 insn_gen_fn gen_mul = GEN_FCN (code);
29507
29508 gcc_assert (code != CODE_FOR_nothing);
29509
29510 one = rs6000_load_constant_and_splat (mode, dconst1);
29511
29512 /* x0 = 1./d estimate */
29513 x0 = gen_reg_rtx (mode);
29514 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
29515 UNSPEC_FRES)));
29516
29517 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
29518 if (passes > 1) {
29519
29520 /* e0 = 1. - d * x0 */
29521 e0 = gen_reg_rtx (mode);
29522 rs6000_emit_nmsub (e0, d, x0, one);
29523
29524 /* x1 = x0 + e0 * x0 */
29525 x1 = gen_reg_rtx (mode);
29526 rs6000_emit_madd (x1, e0, x0, x0);
29527
29528 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
29529 ++i, xprev = xnext, eprev = enext) {
29530
29531 /* enext = eprev * eprev */
29532 enext = gen_reg_rtx (mode);
29533 emit_insn (gen_mul (enext, eprev, eprev));
29534
29535 /* xnext = xprev + enext * xprev */
29536 xnext = gen_reg_rtx (mode);
29537 rs6000_emit_madd (xnext, enext, xprev, xprev);
29538 }
29539
29540 } else
29541 xprev = x0;
29542
29543 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
29544
29545 /* u = n * xprev */
29546 u = gen_reg_rtx (mode);
29547 emit_insn (gen_mul (u, n, xprev));
29548
29549 /* v = n - (d * u) */
29550 v = gen_reg_rtx (mode);
29551 rs6000_emit_nmsub (v, d, u, n);
29552
29553 /* dst = (v * xprev) + u */
29554 rs6000_emit_madd (dst, v, xprev, u);
29555
29556 if (note_p)
29557 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
29558 }
29559
29560 /* Goldschmidt's Algorithm for single/double-precision floating point
29561 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
29562
29563 void
29564 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
29565 {
29566 machine_mode mode = GET_MODE (src);
29567 rtx e = gen_reg_rtx (mode);
29568 rtx g = gen_reg_rtx (mode);
29569 rtx h = gen_reg_rtx (mode);
29570
29571 /* Low precision estimates guarantee 5 bits of accuracy. High
29572 precision estimates guarantee 14 bits of accuracy. SFmode
29573 requires 23 bits of accuracy. DFmode requires 52 bits of
29574 accuracy. Each pass at least doubles the accuracy, leading
29575 to the following. */
29576 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29577 if (mode == DFmode || mode == V2DFmode)
29578 passes++;
29579
29580 int i;
29581 rtx mhalf;
29582 enum insn_code code = optab_handler (smul_optab, mode);
29583 insn_gen_fn gen_mul = GEN_FCN (code);
29584
29585 gcc_assert (code != CODE_FOR_nothing);
29586
29587 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
29588
29589 /* e = rsqrt estimate */
29590 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
29591 UNSPEC_RSQRT)));
29592
29593 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
29594 if (!recip)
29595 {
29596 rtx zero = force_reg (mode, CONST0_RTX (mode));
29597
29598 if (mode == SFmode)
29599 {
29600 rtx target = emit_conditional_move (e, GT, src, zero, mode,
29601 e, zero, mode, 0);
29602 if (target != e)
29603 emit_move_insn (e, target);
29604 }
29605 else
29606 {
29607 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
29608 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
29609 }
29610 }
29611
29612 /* g = sqrt estimate. */
29613 emit_insn (gen_mul (g, e, src));
29614 /* h = 1/(2*sqrt) estimate. */
29615 emit_insn (gen_mul (h, e, mhalf));
29616
29617 if (recip)
29618 {
29619 if (passes == 1)
29620 {
29621 rtx t = gen_reg_rtx (mode);
29622 rs6000_emit_nmsub (t, g, h, mhalf);
29623 /* Apply correction directly to 1/rsqrt estimate. */
29624 rs6000_emit_madd (dst, e, t, e);
29625 }
29626 else
29627 {
29628 for (i = 0; i < passes; i++)
29629 {
29630 rtx t1 = gen_reg_rtx (mode);
29631 rtx g1 = gen_reg_rtx (mode);
29632 rtx h1 = gen_reg_rtx (mode);
29633
29634 rs6000_emit_nmsub (t1, g, h, mhalf);
29635 rs6000_emit_madd (g1, g, t1, g);
29636 rs6000_emit_madd (h1, h, t1, h);
29637
29638 g = g1;
29639 h = h1;
29640 }
29641 /* Multiply by 2 for 1/rsqrt. */
29642 emit_insn (gen_add3_insn (dst, h, h));
29643 }
29644 }
29645 else
29646 {
29647 rtx t = gen_reg_rtx (mode);
29648 rs6000_emit_nmsub (t, g, h, mhalf);
29649 rs6000_emit_madd (dst, g, t, g);
29650 }
29651
29652 return;
29653 }
29654
29655 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
29656 (Power7) targets. DST is the target, and SRC is the argument operand. */
29657
29658 void
29659 rs6000_emit_popcount (rtx dst, rtx src)
29660 {
29661 machine_mode mode = GET_MODE (dst);
29662 rtx tmp1, tmp2;
29663
29664 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
29665 if (TARGET_POPCNTD)
29666 {
29667 if (mode == SImode)
29668 emit_insn (gen_popcntdsi2 (dst, src));
29669 else
29670 emit_insn (gen_popcntddi2 (dst, src));
29671 return;
29672 }
29673
29674 tmp1 = gen_reg_rtx (mode);
29675
29676 if (mode == SImode)
29677 {
29678 emit_insn (gen_popcntbsi2 (tmp1, src));
29679 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
29680 NULL_RTX, 0);
29681 tmp2 = force_reg (SImode, tmp2);
29682 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
29683 }
29684 else
29685 {
29686 emit_insn (gen_popcntbdi2 (tmp1, src));
29687 tmp2 = expand_mult (DImode, tmp1,
29688 GEN_INT ((HOST_WIDE_INT)
29689 0x01010101 << 32 | 0x01010101),
29690 NULL_RTX, 0);
29691 tmp2 = force_reg (DImode, tmp2);
29692 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
29693 }
29694 }
29695
29696
29697 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
29698 target, and SRC is the argument operand. */
29699
29700 void
29701 rs6000_emit_parity (rtx dst, rtx src)
29702 {
29703 machine_mode mode = GET_MODE (dst);
29704 rtx tmp;
29705
29706 tmp = gen_reg_rtx (mode);
29707
29708 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
29709 if (TARGET_CMPB)
29710 {
29711 if (mode == SImode)
29712 {
29713 emit_insn (gen_popcntbsi2 (tmp, src));
29714 emit_insn (gen_paritysi2_cmpb (dst, tmp));
29715 }
29716 else
29717 {
29718 emit_insn (gen_popcntbdi2 (tmp, src));
29719 emit_insn (gen_paritydi2_cmpb (dst, tmp));
29720 }
29721 return;
29722 }
29723
29724 if (mode == SImode)
29725 {
29726 /* Is mult+shift >= shift+xor+shift+xor? */
29727 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
29728 {
29729 rtx tmp1, tmp2, tmp3, tmp4;
29730
29731 tmp1 = gen_reg_rtx (SImode);
29732 emit_insn (gen_popcntbsi2 (tmp1, src));
29733
29734 tmp2 = gen_reg_rtx (SImode);
29735 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
29736 tmp3 = gen_reg_rtx (SImode);
29737 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
29738
29739 tmp4 = gen_reg_rtx (SImode);
29740 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
29741 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
29742 }
29743 else
29744 rs6000_emit_popcount (tmp, src);
29745 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
29746 }
29747 else
29748 {
29749 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
29750 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
29751 {
29752 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
29753
29754 tmp1 = gen_reg_rtx (DImode);
29755 emit_insn (gen_popcntbdi2 (tmp1, src));
29756
29757 tmp2 = gen_reg_rtx (DImode);
29758 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
29759 tmp3 = gen_reg_rtx (DImode);
29760 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
29761
29762 tmp4 = gen_reg_rtx (DImode);
29763 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
29764 tmp5 = gen_reg_rtx (DImode);
29765 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
29766
29767 tmp6 = gen_reg_rtx (DImode);
29768 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
29769 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
29770 }
29771 else
29772 rs6000_emit_popcount (tmp, src);
29773 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
29774 }
29775 }
29776
29777 /* Expand an Altivec constant permutation for little endian mode.
29778 OP0 and OP1 are the input vectors and TARGET is the output vector.
29779 SEL specifies the constant permutation vector.
29780
29781 There are two issues: First, the two input operands must be
29782 swapped so that together they form a double-wide array in LE
29783 order. Second, the vperm instruction has surprising behavior
29784 in LE mode: it interprets the elements of the source vectors
29785 in BE mode ("left to right") and interprets the elements of
29786 the destination vector in LE mode ("right to left"). To
29787 correct for this, we must subtract each element of the permute
29788 control vector from 31.
29789
29790 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
29791 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
29792 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
29793 serve as the permute control vector. Then, in BE mode,
29794
29795 vperm 9,10,11,12
29796
29797 places the desired result in vr9. However, in LE mode the
29798 vector contents will be
29799
29800 vr10 = 00000003 00000002 00000001 00000000
29801 vr11 = 00000007 00000006 00000005 00000004
29802
29803 The result of the vperm using the same permute control vector is
29804
29805 vr9 = 05000000 07000000 01000000 03000000
29806
29807 That is, the leftmost 4 bytes of vr10 are interpreted as the
29808 source for the rightmost 4 bytes of vr9, and so on.
29809
29810 If we change the permute control vector to
29811
29812 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
29813
29814 and issue
29815
29816 vperm 9,11,10,12
29817
29818 we get the desired
29819
29820 vr9 = 00000006 00000004 00000002 00000000. */
29821
29822 static void
29823 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
29824 const vec_perm_indices &sel)
29825 {
29826 unsigned int i;
29827 rtx perm[16];
29828 rtx constv, unspec;
29829
29830 /* Unpack and adjust the constant selector. */
29831 for (i = 0; i < 16; ++i)
29832 {
29833 unsigned int elt = 31 - (sel[i] & 31);
29834 perm[i] = GEN_INT (elt);
29835 }
29836
29837 /* Expand to a permute, swapping the inputs and using the
29838 adjusted selector. */
29839 if (!REG_P (op0))
29840 op0 = force_reg (V16QImode, op0);
29841 if (!REG_P (op1))
29842 op1 = force_reg (V16QImode, op1);
29843
29844 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
29845 constv = force_reg (V16QImode, constv);
29846 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
29847 UNSPEC_VPERM);
29848 if (!REG_P (target))
29849 {
29850 rtx tmp = gen_reg_rtx (V16QImode);
29851 emit_move_insn (tmp, unspec);
29852 unspec = tmp;
29853 }
29854
29855 emit_move_insn (target, unspec);
29856 }
29857
29858 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
29859 permute control vector. But here it's not a constant, so we must
29860 generate a vector NAND or NOR to do the adjustment. */
29861
29862 void
29863 altivec_expand_vec_perm_le (rtx operands[4])
29864 {
29865 rtx notx, iorx, unspec;
29866 rtx target = operands[0];
29867 rtx op0 = operands[1];
29868 rtx op1 = operands[2];
29869 rtx sel = operands[3];
29870 rtx tmp = target;
29871 rtx norreg = gen_reg_rtx (V16QImode);
29872 machine_mode mode = GET_MODE (target);
29873
29874 /* Get everything in regs so the pattern matches. */
29875 if (!REG_P (op0))
29876 op0 = force_reg (mode, op0);
29877 if (!REG_P (op1))
29878 op1 = force_reg (mode, op1);
29879 if (!REG_P (sel))
29880 sel = force_reg (V16QImode, sel);
29881 if (!REG_P (target))
29882 tmp = gen_reg_rtx (mode);
29883
29884 if (TARGET_P9_VECTOR)
29885 {
29886 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
29887 UNSPEC_VPERMR);
29888 }
29889 else
29890 {
29891 /* Invert the selector with a VNAND if available, else a VNOR.
29892 The VNAND is preferred for future fusion opportunities. */
29893 notx = gen_rtx_NOT (V16QImode, sel);
29894 iorx = (TARGET_P8_VECTOR
29895 ? gen_rtx_IOR (V16QImode, notx, notx)
29896 : gen_rtx_AND (V16QImode, notx, notx));
29897 emit_insn (gen_rtx_SET (norreg, iorx));
29898
29899 /* Permute with operands reversed and adjusted selector. */
29900 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
29901 UNSPEC_VPERM);
29902 }
29903
29904 /* Copy into target, possibly by way of a register. */
29905 if (!REG_P (target))
29906 {
29907 emit_move_insn (tmp, unspec);
29908 unspec = tmp;
29909 }
29910
29911 emit_move_insn (target, unspec);
29912 }
29913
29914 /* Expand an Altivec constant permutation. Return true if we match
29915 an efficient implementation; false to fall back to VPERM.
29916
29917 OP0 and OP1 are the input vectors and TARGET is the output vector.
29918 SEL specifies the constant permutation vector. */
29919
29920 static bool
29921 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
29922 const vec_perm_indices &sel)
29923 {
29924 struct altivec_perm_insn {
29925 HOST_WIDE_INT mask;
29926 enum insn_code impl;
29927 unsigned char perm[16];
29928 };
29929 static const struct altivec_perm_insn patterns[] = {
29930 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
29931 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
29932 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
29933 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
29934 { OPTION_MASK_ALTIVEC,
29935 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
29936 : CODE_FOR_altivec_vmrglb_direct),
29937 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
29938 { OPTION_MASK_ALTIVEC,
29939 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
29940 : CODE_FOR_altivec_vmrglh_direct),
29941 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
29942 { OPTION_MASK_ALTIVEC,
29943 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
29944 : CODE_FOR_altivec_vmrglw_direct),
29945 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
29946 { OPTION_MASK_ALTIVEC,
29947 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
29948 : CODE_FOR_altivec_vmrghb_direct),
29949 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
29950 { OPTION_MASK_ALTIVEC,
29951 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
29952 : CODE_FOR_altivec_vmrghh_direct),
29953 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
29954 { OPTION_MASK_ALTIVEC,
29955 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
29956 : CODE_FOR_altivec_vmrghw_direct),
29957 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
29958 { OPTION_MASK_P8_VECTOR,
29959 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
29960 : CODE_FOR_p8_vmrgow_v4sf_direct),
29961 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
29962 { OPTION_MASK_P8_VECTOR,
29963 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
29964 : CODE_FOR_p8_vmrgew_v4sf_direct),
29965 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
29966 };
29967
29968 unsigned int i, j, elt, which;
29969 unsigned char perm[16];
29970 rtx x;
29971 bool one_vec;
29972
29973 /* Unpack the constant selector. */
29974 for (i = which = 0; i < 16; ++i)
29975 {
29976 elt = sel[i] & 31;
29977 which |= (elt < 16 ? 1 : 2);
29978 perm[i] = elt;
29979 }
29980
29981 /* Simplify the constant selector based on operands. */
29982 switch (which)
29983 {
29984 default:
29985 gcc_unreachable ();
29986
29987 case 3:
29988 one_vec = false;
29989 if (!rtx_equal_p (op0, op1))
29990 break;
29991 /* FALLTHRU */
29992
29993 case 2:
29994 for (i = 0; i < 16; ++i)
29995 perm[i] &= 15;
29996 op0 = op1;
29997 one_vec = true;
29998 break;
29999
30000 case 1:
30001 op1 = op0;
30002 one_vec = true;
30003 break;
30004 }
30005
30006 /* Look for splat patterns. */
30007 if (one_vec)
30008 {
30009 elt = perm[0];
30010
30011 for (i = 0; i < 16; ++i)
30012 if (perm[i] != elt)
30013 break;
30014 if (i == 16)
30015 {
30016 if (!BYTES_BIG_ENDIAN)
30017 elt = 15 - elt;
30018 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
30019 return true;
30020 }
30021
30022 if (elt % 2 == 0)
30023 {
30024 for (i = 0; i < 16; i += 2)
30025 if (perm[i] != elt || perm[i + 1] != elt + 1)
30026 break;
30027 if (i == 16)
30028 {
30029 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
30030 x = gen_reg_rtx (V8HImode);
30031 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
30032 GEN_INT (field)));
30033 emit_move_insn (target, gen_lowpart (V16QImode, x));
30034 return true;
30035 }
30036 }
30037
30038 if (elt % 4 == 0)
30039 {
30040 for (i = 0; i < 16; i += 4)
30041 if (perm[i] != elt
30042 || perm[i + 1] != elt + 1
30043 || perm[i + 2] != elt + 2
30044 || perm[i + 3] != elt + 3)
30045 break;
30046 if (i == 16)
30047 {
30048 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
30049 x = gen_reg_rtx (V4SImode);
30050 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
30051 GEN_INT (field)));
30052 emit_move_insn (target, gen_lowpart (V16QImode, x));
30053 return true;
30054 }
30055 }
30056 }
30057
30058 /* Look for merge and pack patterns. */
30059 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
30060 {
30061 bool swapped;
30062
30063 if ((patterns[j].mask & rs6000_isa_flags) == 0)
30064 continue;
30065
30066 elt = patterns[j].perm[0];
30067 if (perm[0] == elt)
30068 swapped = false;
30069 else if (perm[0] == elt + 16)
30070 swapped = true;
30071 else
30072 continue;
30073 for (i = 1; i < 16; ++i)
30074 {
30075 elt = patterns[j].perm[i];
30076 if (swapped)
30077 elt = (elt >= 16 ? elt - 16 : elt + 16);
30078 else if (one_vec && elt >= 16)
30079 elt -= 16;
30080 if (perm[i] != elt)
30081 break;
30082 }
30083 if (i == 16)
30084 {
30085 enum insn_code icode = patterns[j].impl;
30086 machine_mode omode = insn_data[icode].operand[0].mode;
30087 machine_mode imode = insn_data[icode].operand[1].mode;
30088
30089 /* For little-endian, don't use vpkuwum and vpkuhum if the
30090 underlying vector type is not V4SI and V8HI, respectively.
30091 For example, using vpkuwum with a V8HI picks up the even
30092 halfwords (BE numbering) when the even halfwords (LE
30093 numbering) are what we need. */
30094 if (!BYTES_BIG_ENDIAN
30095 && icode == CODE_FOR_altivec_vpkuwum_direct
30096 && ((REG_P (op0)
30097 && GET_MODE (op0) != V4SImode)
30098 || (SUBREG_P (op0)
30099 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
30100 continue;
30101 if (!BYTES_BIG_ENDIAN
30102 && icode == CODE_FOR_altivec_vpkuhum_direct
30103 && ((REG_P (op0)
30104 && GET_MODE (op0) != V8HImode)
30105 || (SUBREG_P (op0)
30106 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
30107 continue;
30108
30109 /* For little-endian, the two input operands must be swapped
30110 (or swapped back) to ensure proper right-to-left numbering
30111 from 0 to 2N-1. */
30112 if (swapped ^ !BYTES_BIG_ENDIAN)
30113 std::swap (op0, op1);
30114 if (imode != V16QImode)
30115 {
30116 op0 = gen_lowpart (imode, op0);
30117 op1 = gen_lowpart (imode, op1);
30118 }
30119 if (omode == V16QImode)
30120 x = target;
30121 else
30122 x = gen_reg_rtx (omode);
30123 emit_insn (GEN_FCN (icode) (x, op0, op1));
30124 if (omode != V16QImode)
30125 emit_move_insn (target, gen_lowpart (V16QImode, x));
30126 return true;
30127 }
30128 }
30129
30130 if (!BYTES_BIG_ENDIAN)
30131 {
30132 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
30133 return true;
30134 }
30135
30136 return false;
30137 }
30138
30139 /* Expand a VSX Permute Doubleword constant permutation.
30140 Return true if we match an efficient implementation. */
30141
30142 static bool
30143 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
30144 unsigned char perm0, unsigned char perm1)
30145 {
30146 rtx x;
30147
30148 /* If both selectors come from the same operand, fold to single op. */
30149 if ((perm0 & 2) == (perm1 & 2))
30150 {
30151 if (perm0 & 2)
30152 op0 = op1;
30153 else
30154 op1 = op0;
30155 }
30156 /* If both operands are equal, fold to simpler permutation. */
30157 if (rtx_equal_p (op0, op1))
30158 {
30159 perm0 = perm0 & 1;
30160 perm1 = (perm1 & 1) + 2;
30161 }
30162 /* If the first selector comes from the second operand, swap. */
30163 else if (perm0 & 2)
30164 {
30165 if (perm1 & 2)
30166 return false;
30167 perm0 -= 2;
30168 perm1 += 2;
30169 std::swap (op0, op1);
30170 }
30171 /* If the second selector does not come from the second operand, fail. */
30172 else if ((perm1 & 2) == 0)
30173 return false;
30174
30175 /* Success! */
30176 if (target != NULL)
30177 {
30178 machine_mode vmode, dmode;
30179 rtvec v;
30180
30181 vmode = GET_MODE (target);
30182 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
30183 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
30184 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
30185 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
30186 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
30187 emit_insn (gen_rtx_SET (target, x));
30188 }
30189 return true;
30190 }
30191
30192 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30193
30194 static bool
30195 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
30196 rtx op1, const vec_perm_indices &sel)
30197 {
30198 bool testing_p = !target;
30199
30200 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
30201 if (TARGET_ALTIVEC && testing_p)
30202 return true;
30203
30204 /* Check for ps_merge* or xxpermdi insns. */
30205 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
30206 {
30207 if (testing_p)
30208 {
30209 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
30210 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
30211 }
30212 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
30213 return true;
30214 }
30215
30216 if (TARGET_ALTIVEC)
30217 {
30218 /* Force the target-independent code to lower to V16QImode. */
30219 if (vmode != V16QImode)
30220 return false;
30221 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
30222 return true;
30223 }
30224
30225 return false;
30226 }
30227
30228 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
30229 OP0 and OP1 are the input vectors and TARGET is the output vector.
30230 PERM specifies the constant permutation vector. */
30231
30232 static void
30233 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
30234 machine_mode vmode, const vec_perm_builder &perm)
30235 {
30236 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
30237 if (x != target)
30238 emit_move_insn (target, x);
30239 }
30240
30241 /* Expand an extract even operation. */
30242
30243 void
30244 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
30245 {
30246 machine_mode vmode = GET_MODE (target);
30247 unsigned i, nelt = GET_MODE_NUNITS (vmode);
30248 vec_perm_builder perm (nelt, nelt, 1);
30249
30250 for (i = 0; i < nelt; i++)
30251 perm.quick_push (i * 2);
30252
30253 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30254 }
30255
30256 /* Expand a vector interleave operation. */
30257
30258 void
30259 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
30260 {
30261 machine_mode vmode = GET_MODE (target);
30262 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
30263 vec_perm_builder perm (nelt, nelt, 1);
30264
30265 high = (highp ? 0 : nelt / 2);
30266 for (i = 0; i < nelt / 2; i++)
30267 {
30268 perm.quick_push (i + high);
30269 perm.quick_push (i + nelt + high);
30270 }
30271
30272 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30273 }
30274
30275 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
30276 void
30277 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
30278 {
30279 HOST_WIDE_INT hwi_scale (scale);
30280 REAL_VALUE_TYPE r_pow;
30281 rtvec v = rtvec_alloc (2);
30282 rtx elt;
30283 rtx scale_vec = gen_reg_rtx (V2DFmode);
30284 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
30285 elt = const_double_from_real_value (r_pow, DFmode);
30286 RTVEC_ELT (v, 0) = elt;
30287 RTVEC_ELT (v, 1) = elt;
30288 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
30289 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
30290 }
30291
30292 /* Return an RTX representing where to find the function value of a
30293 function returning MODE. */
30294 static rtx
30295 rs6000_complex_function_value (machine_mode mode)
30296 {
30297 unsigned int regno;
30298 rtx r1, r2;
30299 machine_mode inner = GET_MODE_INNER (mode);
30300 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
30301
30302 if (TARGET_FLOAT128_TYPE
30303 && (mode == KCmode
30304 || (mode == TCmode && TARGET_IEEEQUAD)))
30305 regno = ALTIVEC_ARG_RETURN;
30306
30307 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30308 regno = FP_ARG_RETURN;
30309
30310 else
30311 {
30312 regno = GP_ARG_RETURN;
30313
30314 /* 32-bit is OK since it'll go in r3/r4. */
30315 if (TARGET_32BIT && inner_bytes >= 4)
30316 return gen_rtx_REG (mode, regno);
30317 }
30318
30319 if (inner_bytes >= 8)
30320 return gen_rtx_REG (mode, regno);
30321
30322 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
30323 const0_rtx);
30324 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
30325 GEN_INT (inner_bytes));
30326 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
30327 }
30328
30329 /* Return an rtx describing a return value of MODE as a PARALLEL
30330 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
30331 stride REG_STRIDE. */
30332
30333 static rtx
30334 rs6000_parallel_return (machine_mode mode,
30335 int n_elts, machine_mode elt_mode,
30336 unsigned int regno, unsigned int reg_stride)
30337 {
30338 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
30339
30340 int i;
30341 for (i = 0; i < n_elts; i++)
30342 {
30343 rtx r = gen_rtx_REG (elt_mode, regno);
30344 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
30345 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
30346 regno += reg_stride;
30347 }
30348
30349 return par;
30350 }
30351
30352 /* Target hook for TARGET_FUNCTION_VALUE.
30353
30354 An integer value is in r3 and a floating-point value is in fp1,
30355 unless -msoft-float. */
30356
30357 static rtx
30358 rs6000_function_value (const_tree valtype,
30359 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
30360 bool outgoing ATTRIBUTE_UNUSED)
30361 {
30362 machine_mode mode;
30363 unsigned int regno;
30364 machine_mode elt_mode;
30365 int n_elts;
30366
30367 /* Special handling for structs in darwin64. */
30368 if (TARGET_MACHO
30369 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
30370 {
30371 CUMULATIVE_ARGS valcum;
30372 rtx valret;
30373
30374 valcum.words = 0;
30375 valcum.fregno = FP_ARG_MIN_REG;
30376 valcum.vregno = ALTIVEC_ARG_MIN_REG;
30377 /* Do a trial code generation as if this were going to be passed as
30378 an argument; if any part goes in memory, we return NULL. */
30379 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
30380 if (valret)
30381 return valret;
30382 /* Otherwise fall through to standard ABI rules. */
30383 }
30384
30385 mode = TYPE_MODE (valtype);
30386
30387 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
30388 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
30389 {
30390 int first_reg, n_regs;
30391
30392 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
30393 {
30394 /* _Decimal128 must use even/odd register pairs. */
30395 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30396 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
30397 }
30398 else
30399 {
30400 first_reg = ALTIVEC_ARG_RETURN;
30401 n_regs = 1;
30402 }
30403
30404 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
30405 }
30406
30407 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
30408 if (TARGET_32BIT && TARGET_POWERPC64)
30409 switch (mode)
30410 {
30411 default:
30412 break;
30413 case E_DImode:
30414 case E_SCmode:
30415 case E_DCmode:
30416 case E_TCmode:
30417 int count = GET_MODE_SIZE (mode) / 4;
30418 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
30419 }
30420
30421 if ((INTEGRAL_TYPE_P (valtype)
30422 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
30423 || POINTER_TYPE_P (valtype))
30424 mode = TARGET_32BIT ? SImode : DImode;
30425
30426 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30427 /* _Decimal128 must use an even/odd register pair. */
30428 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30429 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
30430 && !FLOAT128_VECTOR_P (mode))
30431 regno = FP_ARG_RETURN;
30432 else if (TREE_CODE (valtype) == COMPLEX_TYPE
30433 && targetm.calls.split_complex_arg)
30434 return rs6000_complex_function_value (mode);
30435 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30436 return register is used in both cases, and we won't see V2DImode/V2DFmode
30437 for pure altivec, combine the two cases. */
30438 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
30439 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
30440 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
30441 regno = ALTIVEC_ARG_RETURN;
30442 else
30443 regno = GP_ARG_RETURN;
30444
30445 return gen_rtx_REG (mode, regno);
30446 }
30447
30448 /* Define how to find the value returned by a library function
30449 assuming the value has mode MODE. */
30450 rtx
30451 rs6000_libcall_value (machine_mode mode)
30452 {
30453 unsigned int regno;
30454
30455 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
30456 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
30457 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
30458
30459 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30460 /* _Decimal128 must use an even/odd register pair. */
30461 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30462 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
30463 regno = FP_ARG_RETURN;
30464 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30465 return register is used in both cases, and we won't see V2DImode/V2DFmode
30466 for pure altivec, combine the two cases. */
30467 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
30468 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
30469 regno = ALTIVEC_ARG_RETURN;
30470 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
30471 return rs6000_complex_function_value (mode);
30472 else
30473 regno = GP_ARG_RETURN;
30474
30475 return gen_rtx_REG (mode, regno);
30476 }
30477
30478 /* Compute register pressure classes. We implement the target hook to avoid
30479 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
30480 lead to incorrect estimates of number of available registers and therefor
30481 increased register pressure/spill. */
30482 static int
30483 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
30484 {
30485 int n;
30486
30487 n = 0;
30488 pressure_classes[n++] = GENERAL_REGS;
30489 if (TARGET_VSX)
30490 pressure_classes[n++] = VSX_REGS;
30491 else
30492 {
30493 if (TARGET_ALTIVEC)
30494 pressure_classes[n++] = ALTIVEC_REGS;
30495 if (TARGET_HARD_FLOAT)
30496 pressure_classes[n++] = FLOAT_REGS;
30497 }
30498 pressure_classes[n++] = CR_REGS;
30499 pressure_classes[n++] = SPECIAL_REGS;
30500
30501 return n;
30502 }
30503
30504 /* Given FROM and TO register numbers, say whether this elimination is allowed.
30505 Frame pointer elimination is automatically handled.
30506
30507 For the RS/6000, if frame pointer elimination is being done, we would like
30508 to convert ap into fp, not sp.
30509
30510 We need r30 if -mminimal-toc was specified, and there are constant pool
30511 references. */
30512
30513 static bool
30514 rs6000_can_eliminate (const int from, const int to)
30515 {
30516 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
30517 ? ! frame_pointer_needed
30518 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
30519 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
30520 || constant_pool_empty_p ()
30521 : true);
30522 }
30523
30524 /* Define the offset between two registers, FROM to be eliminated and its
30525 replacement TO, at the start of a routine. */
30526 HOST_WIDE_INT
30527 rs6000_initial_elimination_offset (int from, int to)
30528 {
30529 rs6000_stack_t *info = rs6000_stack_info ();
30530 HOST_WIDE_INT offset;
30531
30532 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30533 offset = info->push_p ? 0 : -info->total_size;
30534 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30535 {
30536 offset = info->push_p ? 0 : -info->total_size;
30537 if (FRAME_GROWS_DOWNWARD)
30538 offset += info->fixed_size + info->vars_size + info->parm_size;
30539 }
30540 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30541 offset = FRAME_GROWS_DOWNWARD
30542 ? info->fixed_size + info->vars_size + info->parm_size
30543 : 0;
30544 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30545 offset = info->total_size;
30546 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30547 offset = info->push_p ? info->total_size : 0;
30548 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
30549 offset = 0;
30550 else
30551 gcc_unreachable ();
30552
30553 return offset;
30554 }
30555
30556 /* Fill in sizes of registers used by unwinder. */
30557
30558 static void
30559 rs6000_init_dwarf_reg_sizes_extra (tree address)
30560 {
30561 if (TARGET_MACHO && ! TARGET_ALTIVEC)
30562 {
30563 int i;
30564 machine_mode mode = TYPE_MODE (char_type_node);
30565 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
30566 rtx mem = gen_rtx_MEM (BLKmode, addr);
30567 rtx value = gen_int_mode (16, mode);
30568
30569 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
30570 The unwinder still needs to know the size of Altivec registers. */
30571
30572 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
30573 {
30574 int column = DWARF_REG_TO_UNWIND_COLUMN
30575 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
30576 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
30577
30578 emit_move_insn (adjust_address (mem, mode, offset), value);
30579 }
30580 }
30581 }
30582
30583 /* Map internal gcc register numbers to debug format register numbers.
30584 FORMAT specifies the type of debug register number to use:
30585 0 -- debug information, except for frame-related sections
30586 1 -- DWARF .debug_frame section
30587 2 -- DWARF .eh_frame section */
30588
30589 unsigned int
30590 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
30591 {
30592 /* On some platforms, we use the standard DWARF register
30593 numbering for .debug_info and .debug_frame. */
30594 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
30595 {
30596 #ifdef RS6000_USE_DWARF_NUMBERING
30597 if (regno <= 31)
30598 return regno;
30599 if (FP_REGNO_P (regno))
30600 return regno - FIRST_FPR_REGNO + 32;
30601 if (ALTIVEC_REGNO_P (regno))
30602 return regno - FIRST_ALTIVEC_REGNO + 1124;
30603 if (regno == LR_REGNO)
30604 return 108;
30605 if (regno == CTR_REGNO)
30606 return 109;
30607 if (regno == CA_REGNO)
30608 return 101; /* XER */
30609 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
30610 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
30611 The actual code emitted saves the whole of CR, so we map CR2_REGNO
30612 to the DWARF reg for CR. */
30613 if (format == 1 && regno == CR2_REGNO)
30614 return 64;
30615 if (CR_REGNO_P (regno))
30616 return regno - CR0_REGNO + 86;
30617 if (regno == VRSAVE_REGNO)
30618 return 356;
30619 if (regno == VSCR_REGNO)
30620 return 67;
30621
30622 /* These do not make much sense. */
30623 if (regno == FRAME_POINTER_REGNUM)
30624 return 111;
30625 if (regno == ARG_POINTER_REGNUM)
30626 return 67;
30627 if (regno == 64)
30628 return 100;
30629
30630 gcc_unreachable ();
30631 #endif
30632 }
30633
30634 /* We use the GCC 7 (and before) internal number for non-DWARF debug
30635 information, and also for .eh_frame. */
30636 /* Translate the regnos to their numbers in GCC 7 (and before). */
30637 if (regno <= 31)
30638 return regno;
30639 if (FP_REGNO_P (regno))
30640 return regno - FIRST_FPR_REGNO + 32;
30641 if (ALTIVEC_REGNO_P (regno))
30642 return regno - FIRST_ALTIVEC_REGNO + 77;
30643 if (regno == LR_REGNO)
30644 return 65;
30645 if (regno == CTR_REGNO)
30646 return 66;
30647 if (regno == CA_REGNO)
30648 return 76; /* XER */
30649 if (CR_REGNO_P (regno))
30650 return regno - CR0_REGNO + 68;
30651 if (regno == VRSAVE_REGNO)
30652 return 109;
30653 if (regno == VSCR_REGNO)
30654 return 110;
30655
30656 if (regno == FRAME_POINTER_REGNUM)
30657 return 111;
30658 if (regno == ARG_POINTER_REGNUM)
30659 return 67;
30660 if (regno == 64)
30661 return 64;
30662
30663 gcc_unreachable ();
30664 }
30665
30666 /* target hook eh_return_filter_mode */
30667 static scalar_int_mode
30668 rs6000_eh_return_filter_mode (void)
30669 {
30670 return TARGET_32BIT ? SImode : word_mode;
30671 }
30672
30673 /* Target hook for translate_mode_attribute. */
30674 static machine_mode
30675 rs6000_translate_mode_attribute (machine_mode mode)
30676 {
30677 if ((FLOAT128_IEEE_P (mode)
30678 && ieee128_float_type_node == long_double_type_node)
30679 || (FLOAT128_IBM_P (mode)
30680 && ibm128_float_type_node == long_double_type_node))
30681 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
30682 return mode;
30683 }
30684
30685 /* Target hook for scalar_mode_supported_p. */
30686 static bool
30687 rs6000_scalar_mode_supported_p (scalar_mode mode)
30688 {
30689 /* -m32 does not support TImode. This is the default, from
30690 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
30691 same ABI as for -m32. But default_scalar_mode_supported_p allows
30692 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
30693 for -mpowerpc64. */
30694 if (TARGET_32BIT && mode == TImode)
30695 return false;
30696
30697 if (DECIMAL_FLOAT_MODE_P (mode))
30698 return default_decimal_float_supported_p ();
30699 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
30700 return true;
30701 else
30702 return default_scalar_mode_supported_p (mode);
30703 }
30704
30705 /* Target hook for vector_mode_supported_p. */
30706 static bool
30707 rs6000_vector_mode_supported_p (machine_mode mode)
30708 {
30709 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
30710 128-bit, the compiler might try to widen IEEE 128-bit to IBM
30711 double-double. */
30712 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
30713 return true;
30714
30715 else
30716 return false;
30717 }
30718
30719 /* Target hook for floatn_mode. */
30720 static opt_scalar_float_mode
30721 rs6000_floatn_mode (int n, bool extended)
30722 {
30723 if (extended)
30724 {
30725 switch (n)
30726 {
30727 case 32:
30728 return DFmode;
30729
30730 case 64:
30731 if (TARGET_FLOAT128_TYPE)
30732 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30733 else
30734 return opt_scalar_float_mode ();
30735
30736 case 128:
30737 return opt_scalar_float_mode ();
30738
30739 default:
30740 /* Those are the only valid _FloatNx types. */
30741 gcc_unreachable ();
30742 }
30743 }
30744 else
30745 {
30746 switch (n)
30747 {
30748 case 32:
30749 return SFmode;
30750
30751 case 64:
30752 return DFmode;
30753
30754 case 128:
30755 if (TARGET_FLOAT128_TYPE)
30756 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30757 else
30758 return opt_scalar_float_mode ();
30759
30760 default:
30761 return opt_scalar_float_mode ();
30762 }
30763 }
30764
30765 }
30766
30767 /* Target hook for c_mode_for_suffix. */
30768 static machine_mode
30769 rs6000_c_mode_for_suffix (char suffix)
30770 {
30771 if (TARGET_FLOAT128_TYPE)
30772 {
30773 if (suffix == 'q' || suffix == 'Q')
30774 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30775
30776 /* At the moment, we are not defining a suffix for IBM extended double.
30777 If/when the default for -mabi=ieeelongdouble is changed, and we want
30778 to support __ibm128 constants in legacy library code, we may need to
30779 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
30780 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
30781 __float80 constants. */
30782 }
30783
30784 return VOIDmode;
30785 }
30786
30787 /* Target hook for invalid_arg_for_unprototyped_fn. */
30788 static const char *
30789 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
30790 {
30791 return (!rs6000_darwin64_abi
30792 && typelist == 0
30793 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
30794 && (funcdecl == NULL_TREE
30795 || (TREE_CODE (funcdecl) == FUNCTION_DECL
30796 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
30797 ? N_("AltiVec argument passed to unprototyped function")
30798 : NULL;
30799 }
30800
30801 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
30802 setup by using __stack_chk_fail_local hidden function instead of
30803 calling __stack_chk_fail directly. Otherwise it is better to call
30804 __stack_chk_fail directly. */
30805
30806 static tree ATTRIBUTE_UNUSED
30807 rs6000_stack_protect_fail (void)
30808 {
30809 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
30810 ? default_hidden_stack_protect_fail ()
30811 : default_external_stack_protect_fail ();
30812 }
30813
30814 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30815
30816 #if TARGET_ELF
30817 static unsigned HOST_WIDE_INT
30818 rs6000_asan_shadow_offset (void)
30819 {
30820 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
30821 }
30822 #endif
30823 \f
30824 /* Mask options that we want to support inside of attribute((target)) and
30825 #pragma GCC target operations. Note, we do not include things like
30826 64/32-bit, endianness, hard/soft floating point, etc. that would have
30827 different calling sequences. */
30828
30829 struct rs6000_opt_mask {
30830 const char *name; /* option name */
30831 HOST_WIDE_INT mask; /* mask to set */
30832 bool invert; /* invert sense of mask */
30833 bool valid_target; /* option is a target option */
30834 };
30835
30836 static struct rs6000_opt_mask const rs6000_opt_masks[] =
30837 {
30838 { "altivec", OPTION_MASK_ALTIVEC, false, true },
30839 { "cmpb", OPTION_MASK_CMPB, false, true },
30840 { "crypto", OPTION_MASK_CRYPTO, false, true },
30841 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
30842 { "dlmzb", OPTION_MASK_DLMZB, false, true },
30843 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
30844 false, true },
30845 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
30846 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
30847 { "fprnd", OPTION_MASK_FPRND, false, true },
30848 { "future", OPTION_MASK_FUTURE, false, true },
30849 { "hard-dfp", OPTION_MASK_DFP, false, true },
30850 { "htm", OPTION_MASK_HTM, false, true },
30851 { "isel", OPTION_MASK_ISEL, false, true },
30852 { "mfcrf", OPTION_MASK_MFCRF, false, true },
30853 { "mfpgpr", 0, false, true },
30854 { "modulo", OPTION_MASK_MODULO, false, true },
30855 { "mulhw", OPTION_MASK_MULHW, false, true },
30856 { "multiple", OPTION_MASK_MULTIPLE, false, true },
30857 { "pcrel", OPTION_MASK_PCREL, false, true },
30858 { "popcntb", OPTION_MASK_POPCNTB, false, true },
30859 { "popcntd", OPTION_MASK_POPCNTD, false, true },
30860 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
30861 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
30862 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
30863 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
30864 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
30865 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
30866 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
30867 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
30868 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
30869 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
30870 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
30871 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
30872 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
30873 { "string", 0, false, true },
30874 { "update", OPTION_MASK_NO_UPDATE, true , true },
30875 { "vsx", OPTION_MASK_VSX, false, true },
30876 #ifdef OPTION_MASK_64BIT
30877 #if TARGET_AIX_OS
30878 { "aix64", OPTION_MASK_64BIT, false, false },
30879 { "aix32", OPTION_MASK_64BIT, true, false },
30880 #else
30881 { "64", OPTION_MASK_64BIT, false, false },
30882 { "32", OPTION_MASK_64BIT, true, false },
30883 #endif
30884 #endif
30885 #ifdef OPTION_MASK_EABI
30886 { "eabi", OPTION_MASK_EABI, false, false },
30887 #endif
30888 #ifdef OPTION_MASK_LITTLE_ENDIAN
30889 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
30890 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
30891 #endif
30892 #ifdef OPTION_MASK_RELOCATABLE
30893 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
30894 #endif
30895 #ifdef OPTION_MASK_STRICT_ALIGN
30896 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
30897 #endif
30898 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
30899 { "string", 0, false, false },
30900 };
30901
30902 /* Builtin mask mapping for printing the flags. */
30903 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
30904 {
30905 { "altivec", RS6000_BTM_ALTIVEC, false, false },
30906 { "vsx", RS6000_BTM_VSX, false, false },
30907 { "fre", RS6000_BTM_FRE, false, false },
30908 { "fres", RS6000_BTM_FRES, false, false },
30909 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
30910 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
30911 { "popcntd", RS6000_BTM_POPCNTD, false, false },
30912 { "cell", RS6000_BTM_CELL, false, false },
30913 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
30914 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
30915 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
30916 { "crypto", RS6000_BTM_CRYPTO, false, false },
30917 { "htm", RS6000_BTM_HTM, false, false },
30918 { "hard-dfp", RS6000_BTM_DFP, false, false },
30919 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
30920 { "long-double-128", RS6000_BTM_LDBL128, false, false },
30921 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
30922 { "float128", RS6000_BTM_FLOAT128, false, false },
30923 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
30924 };
30925
30926 /* Option variables that we want to support inside attribute((target)) and
30927 #pragma GCC target operations. */
30928
30929 struct rs6000_opt_var {
30930 const char *name; /* option name */
30931 size_t global_offset; /* offset of the option in global_options. */
30932 size_t target_offset; /* offset of the option in target options. */
30933 };
30934
30935 static struct rs6000_opt_var const rs6000_opt_vars[] =
30936 {
30937 { "friz",
30938 offsetof (struct gcc_options, x_TARGET_FRIZ),
30939 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
30940 { "avoid-indexed-addresses",
30941 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
30942 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
30943 { "longcall",
30944 offsetof (struct gcc_options, x_rs6000_default_long_calls),
30945 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
30946 { "optimize-swaps",
30947 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
30948 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
30949 { "allow-movmisalign",
30950 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
30951 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
30952 { "sched-groups",
30953 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
30954 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
30955 { "always-hint",
30956 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
30957 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
30958 { "align-branch-targets",
30959 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
30960 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
30961 { "tls-markers",
30962 offsetof (struct gcc_options, x_tls_markers),
30963 offsetof (struct cl_target_option, x_tls_markers), },
30964 { "sched-prolog",
30965 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30966 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30967 { "sched-epilog",
30968 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30969 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30970 { "speculate-indirect-jumps",
30971 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
30972 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
30973 };
30974
30975 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
30976 parsing. Return true if there were no errors. */
30977
30978 static bool
30979 rs6000_inner_target_options (tree args, bool attr_p)
30980 {
30981 bool ret = true;
30982
30983 if (args == NULL_TREE)
30984 ;
30985
30986 else if (TREE_CODE (args) == STRING_CST)
30987 {
30988 char *p = ASTRDUP (TREE_STRING_POINTER (args));
30989 char *q;
30990
30991 while ((q = strtok (p, ",")) != NULL)
30992 {
30993 bool error_p = false;
30994 bool not_valid_p = false;
30995 const char *cpu_opt = NULL;
30996
30997 p = NULL;
30998 if (strncmp (q, "cpu=", 4) == 0)
30999 {
31000 int cpu_index = rs6000_cpu_name_lookup (q+4);
31001 if (cpu_index >= 0)
31002 rs6000_cpu_index = cpu_index;
31003 else
31004 {
31005 error_p = true;
31006 cpu_opt = q+4;
31007 }
31008 }
31009 else if (strncmp (q, "tune=", 5) == 0)
31010 {
31011 int tune_index = rs6000_cpu_name_lookup (q+5);
31012 if (tune_index >= 0)
31013 rs6000_tune_index = tune_index;
31014 else
31015 {
31016 error_p = true;
31017 cpu_opt = q+5;
31018 }
31019 }
31020 else
31021 {
31022 size_t i;
31023 bool invert = false;
31024 char *r = q;
31025
31026 error_p = true;
31027 if (strncmp (r, "no-", 3) == 0)
31028 {
31029 invert = true;
31030 r += 3;
31031 }
31032
31033 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
31034 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
31035 {
31036 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
31037
31038 if (!rs6000_opt_masks[i].valid_target)
31039 not_valid_p = true;
31040 else
31041 {
31042 error_p = false;
31043 rs6000_isa_flags_explicit |= mask;
31044
31045 /* VSX needs altivec, so -mvsx automagically sets
31046 altivec and disables -mavoid-indexed-addresses. */
31047 if (!invert)
31048 {
31049 if (mask == OPTION_MASK_VSX)
31050 {
31051 mask |= OPTION_MASK_ALTIVEC;
31052 TARGET_AVOID_XFORM = 0;
31053 }
31054 }
31055
31056 if (rs6000_opt_masks[i].invert)
31057 invert = !invert;
31058
31059 if (invert)
31060 rs6000_isa_flags &= ~mask;
31061 else
31062 rs6000_isa_flags |= mask;
31063 }
31064 break;
31065 }
31066
31067 if (error_p && !not_valid_p)
31068 {
31069 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
31070 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
31071 {
31072 size_t j = rs6000_opt_vars[i].global_offset;
31073 *((int *) ((char *)&global_options + j)) = !invert;
31074 error_p = false;
31075 not_valid_p = false;
31076 break;
31077 }
31078 }
31079 }
31080
31081 if (error_p)
31082 {
31083 const char *eprefix, *esuffix;
31084
31085 ret = false;
31086 if (attr_p)
31087 {
31088 eprefix = "__attribute__((__target__(";
31089 esuffix = ")))";
31090 }
31091 else
31092 {
31093 eprefix = "#pragma GCC target ";
31094 esuffix = "";
31095 }
31096
31097 if (cpu_opt)
31098 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
31099 q, esuffix);
31100 else if (not_valid_p)
31101 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
31102 else
31103 error ("%s%qs%s is invalid", eprefix, q, esuffix);
31104 }
31105 }
31106 }
31107
31108 else if (TREE_CODE (args) == TREE_LIST)
31109 {
31110 do
31111 {
31112 tree value = TREE_VALUE (args);
31113 if (value)
31114 {
31115 bool ret2 = rs6000_inner_target_options (value, attr_p);
31116 if (!ret2)
31117 ret = false;
31118 }
31119 args = TREE_CHAIN (args);
31120 }
31121 while (args != NULL_TREE);
31122 }
31123
31124 else
31125 {
31126 error ("attribute %<target%> argument not a string");
31127 return false;
31128 }
31129
31130 return ret;
31131 }
31132
31133 /* Print out the target options as a list for -mdebug=target. */
31134
31135 static void
31136 rs6000_debug_target_options (tree args, const char *prefix)
31137 {
31138 if (args == NULL_TREE)
31139 fprintf (stderr, "%s<NULL>", prefix);
31140
31141 else if (TREE_CODE (args) == STRING_CST)
31142 {
31143 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31144 char *q;
31145
31146 while ((q = strtok (p, ",")) != NULL)
31147 {
31148 p = NULL;
31149 fprintf (stderr, "%s\"%s\"", prefix, q);
31150 prefix = ", ";
31151 }
31152 }
31153
31154 else if (TREE_CODE (args) == TREE_LIST)
31155 {
31156 do
31157 {
31158 tree value = TREE_VALUE (args);
31159 if (value)
31160 {
31161 rs6000_debug_target_options (value, prefix);
31162 prefix = ", ";
31163 }
31164 args = TREE_CHAIN (args);
31165 }
31166 while (args != NULL_TREE);
31167 }
31168
31169 else
31170 gcc_unreachable ();
31171
31172 return;
31173 }
31174
31175 \f
31176 /* Hook to validate attribute((target("..."))). */
31177
31178 static bool
31179 rs6000_valid_attribute_p (tree fndecl,
31180 tree ARG_UNUSED (name),
31181 tree args,
31182 int flags)
31183 {
31184 struct cl_target_option cur_target;
31185 bool ret;
31186 tree old_optimize;
31187 tree new_target, new_optimize;
31188 tree func_optimize;
31189
31190 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31191
31192 if (TARGET_DEBUG_TARGET)
31193 {
31194 tree tname = DECL_NAME (fndecl);
31195 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
31196 if (tname)
31197 fprintf (stderr, "function: %.*s\n",
31198 (int) IDENTIFIER_LENGTH (tname),
31199 IDENTIFIER_POINTER (tname));
31200 else
31201 fprintf (stderr, "function: unknown\n");
31202
31203 fprintf (stderr, "args:");
31204 rs6000_debug_target_options (args, " ");
31205 fprintf (stderr, "\n");
31206
31207 if (flags)
31208 fprintf (stderr, "flags: 0x%x\n", flags);
31209
31210 fprintf (stderr, "--------------------\n");
31211 }
31212
31213 /* attribute((target("default"))) does nothing, beyond
31214 affecting multi-versioning. */
31215 if (TREE_VALUE (args)
31216 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
31217 && TREE_CHAIN (args) == NULL_TREE
31218 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
31219 return true;
31220
31221 old_optimize = build_optimization_node (&global_options);
31222 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31223
31224 /* If the function changed the optimization levels as well as setting target
31225 options, start with the optimizations specified. */
31226 if (func_optimize && func_optimize != old_optimize)
31227 cl_optimization_restore (&global_options,
31228 TREE_OPTIMIZATION (func_optimize));
31229
31230 /* The target attributes may also change some optimization flags, so update
31231 the optimization options if necessary. */
31232 cl_target_option_save (&cur_target, &global_options);
31233 rs6000_cpu_index = rs6000_tune_index = -1;
31234 ret = rs6000_inner_target_options (args, true);
31235
31236 /* Set up any additional state. */
31237 if (ret)
31238 {
31239 ret = rs6000_option_override_internal (false);
31240 new_target = build_target_option_node (&global_options);
31241 }
31242 else
31243 new_target = NULL;
31244
31245 new_optimize = build_optimization_node (&global_options);
31246
31247 if (!new_target)
31248 ret = false;
31249
31250 else if (fndecl)
31251 {
31252 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
31253
31254 if (old_optimize != new_optimize)
31255 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31256 }
31257
31258 cl_target_option_restore (&global_options, &cur_target);
31259
31260 if (old_optimize != new_optimize)
31261 cl_optimization_restore (&global_options,
31262 TREE_OPTIMIZATION (old_optimize));
31263
31264 return ret;
31265 }
31266
31267 \f
31268 /* Hook to validate the current #pragma GCC target and set the state, and
31269 update the macros based on what was changed. If ARGS is NULL, then
31270 POP_TARGET is used to reset the options. */
31271
31272 bool
31273 rs6000_pragma_target_parse (tree args, tree pop_target)
31274 {
31275 tree prev_tree = build_target_option_node (&global_options);
31276 tree cur_tree;
31277 struct cl_target_option *prev_opt, *cur_opt;
31278 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
31279 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
31280
31281 if (TARGET_DEBUG_TARGET)
31282 {
31283 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
31284 fprintf (stderr, "args:");
31285 rs6000_debug_target_options (args, " ");
31286 fprintf (stderr, "\n");
31287
31288 if (pop_target)
31289 {
31290 fprintf (stderr, "pop_target:\n");
31291 debug_tree (pop_target);
31292 }
31293 else
31294 fprintf (stderr, "pop_target: <NULL>\n");
31295
31296 fprintf (stderr, "--------------------\n");
31297 }
31298
31299 if (! args)
31300 {
31301 cur_tree = ((pop_target)
31302 ? pop_target
31303 : target_option_default_node);
31304 cl_target_option_restore (&global_options,
31305 TREE_TARGET_OPTION (cur_tree));
31306 }
31307 else
31308 {
31309 rs6000_cpu_index = rs6000_tune_index = -1;
31310 if (!rs6000_inner_target_options (args, false)
31311 || !rs6000_option_override_internal (false)
31312 || (cur_tree = build_target_option_node (&global_options))
31313 == NULL_TREE)
31314 {
31315 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
31316 fprintf (stderr, "invalid pragma\n");
31317
31318 return false;
31319 }
31320 }
31321
31322 target_option_current_node = cur_tree;
31323 rs6000_activate_target_options (target_option_current_node);
31324
31325 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
31326 change the macros that are defined. */
31327 if (rs6000_target_modify_macros_ptr)
31328 {
31329 prev_opt = TREE_TARGET_OPTION (prev_tree);
31330 prev_bumask = prev_opt->x_rs6000_builtin_mask;
31331 prev_flags = prev_opt->x_rs6000_isa_flags;
31332
31333 cur_opt = TREE_TARGET_OPTION (cur_tree);
31334 cur_flags = cur_opt->x_rs6000_isa_flags;
31335 cur_bumask = cur_opt->x_rs6000_builtin_mask;
31336
31337 diff_bumask = (prev_bumask ^ cur_bumask);
31338 diff_flags = (prev_flags ^ cur_flags);
31339
31340 if ((diff_flags != 0) || (diff_bumask != 0))
31341 {
31342 /* Delete old macros. */
31343 rs6000_target_modify_macros_ptr (false,
31344 prev_flags & diff_flags,
31345 prev_bumask & diff_bumask);
31346
31347 /* Define new macros. */
31348 rs6000_target_modify_macros_ptr (true,
31349 cur_flags & diff_flags,
31350 cur_bumask & diff_bumask);
31351 }
31352 }
31353
31354 return true;
31355 }
31356
31357 \f
31358 /* Remember the last target of rs6000_set_current_function. */
31359 static GTY(()) tree rs6000_previous_fndecl;
31360
31361 /* Restore target's globals from NEW_TREE and invalidate the
31362 rs6000_previous_fndecl cache. */
31363
31364 void
31365 rs6000_activate_target_options (tree new_tree)
31366 {
31367 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31368 if (TREE_TARGET_GLOBALS (new_tree))
31369 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31370 else if (new_tree == target_option_default_node)
31371 restore_target_globals (&default_target_globals);
31372 else
31373 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31374 rs6000_previous_fndecl = NULL_TREE;
31375 }
31376
31377 /* Establish appropriate back-end context for processing the function
31378 FNDECL. The argument might be NULL to indicate processing at top
31379 level, outside of any function scope. */
31380 static void
31381 rs6000_set_current_function (tree fndecl)
31382 {
31383 if (TARGET_DEBUG_TARGET)
31384 {
31385 fprintf (stderr, "\n==================== rs6000_set_current_function");
31386
31387 if (fndecl)
31388 fprintf (stderr, ", fndecl %s (%p)",
31389 (DECL_NAME (fndecl)
31390 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
31391 : "<unknown>"), (void *)fndecl);
31392
31393 if (rs6000_previous_fndecl)
31394 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
31395
31396 fprintf (stderr, "\n");
31397 }
31398
31399 /* Only change the context if the function changes. This hook is called
31400 several times in the course of compiling a function, and we don't want to
31401 slow things down too much or call target_reinit when it isn't safe. */
31402 if (fndecl == rs6000_previous_fndecl)
31403 return;
31404
31405 tree old_tree;
31406 if (rs6000_previous_fndecl == NULL_TREE)
31407 old_tree = target_option_current_node;
31408 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
31409 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
31410 else
31411 old_tree = target_option_default_node;
31412
31413 tree new_tree;
31414 if (fndecl == NULL_TREE)
31415 {
31416 if (old_tree != target_option_current_node)
31417 new_tree = target_option_current_node;
31418 else
31419 new_tree = NULL_TREE;
31420 }
31421 else
31422 {
31423 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31424 if (new_tree == NULL_TREE)
31425 new_tree = target_option_default_node;
31426 }
31427
31428 if (TARGET_DEBUG_TARGET)
31429 {
31430 if (new_tree)
31431 {
31432 fprintf (stderr, "\nnew fndecl target specific options:\n");
31433 debug_tree (new_tree);
31434 }
31435
31436 if (old_tree)
31437 {
31438 fprintf (stderr, "\nold fndecl target specific options:\n");
31439 debug_tree (old_tree);
31440 }
31441
31442 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
31443 fprintf (stderr, "--------------------\n");
31444 }
31445
31446 if (new_tree && old_tree != new_tree)
31447 rs6000_activate_target_options (new_tree);
31448
31449 if (fndecl)
31450 rs6000_previous_fndecl = fndecl;
31451 }
31452
31453 \f
31454 /* Save the current options */
31455
31456 static void
31457 rs6000_function_specific_save (struct cl_target_option *ptr,
31458 struct gcc_options *opts)
31459 {
31460 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
31461 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
31462 }
31463
31464 /* Restore the current options */
31465
31466 static void
31467 rs6000_function_specific_restore (struct gcc_options *opts,
31468 struct cl_target_option *ptr)
31469
31470 {
31471 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
31472 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
31473 (void) rs6000_option_override_internal (false);
31474 }
31475
31476 /* Print the current options */
31477
31478 static void
31479 rs6000_function_specific_print (FILE *file, int indent,
31480 struct cl_target_option *ptr)
31481 {
31482 rs6000_print_isa_options (file, indent, "Isa options set",
31483 ptr->x_rs6000_isa_flags);
31484
31485 rs6000_print_isa_options (file, indent, "Isa options explicit",
31486 ptr->x_rs6000_isa_flags_explicit);
31487 }
31488
31489 /* Helper function to print the current isa or misc options on a line. */
31490
31491 static void
31492 rs6000_print_options_internal (FILE *file,
31493 int indent,
31494 const char *string,
31495 HOST_WIDE_INT flags,
31496 const char *prefix,
31497 const struct rs6000_opt_mask *opts,
31498 size_t num_elements)
31499 {
31500 size_t i;
31501 size_t start_column = 0;
31502 size_t cur_column;
31503 size_t max_column = 120;
31504 size_t prefix_len = strlen (prefix);
31505 size_t comma_len = 0;
31506 const char *comma = "";
31507
31508 if (indent)
31509 start_column += fprintf (file, "%*s", indent, "");
31510
31511 if (!flags)
31512 {
31513 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
31514 return;
31515 }
31516
31517 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
31518
31519 /* Print the various mask options. */
31520 cur_column = start_column;
31521 for (i = 0; i < num_elements; i++)
31522 {
31523 bool invert = opts[i].invert;
31524 const char *name = opts[i].name;
31525 const char *no_str = "";
31526 HOST_WIDE_INT mask = opts[i].mask;
31527 size_t len = comma_len + prefix_len + strlen (name);
31528
31529 if (!invert)
31530 {
31531 if ((flags & mask) == 0)
31532 {
31533 no_str = "no-";
31534 len += sizeof ("no-") - 1;
31535 }
31536
31537 flags &= ~mask;
31538 }
31539
31540 else
31541 {
31542 if ((flags & mask) != 0)
31543 {
31544 no_str = "no-";
31545 len += sizeof ("no-") - 1;
31546 }
31547
31548 flags |= mask;
31549 }
31550
31551 cur_column += len;
31552 if (cur_column > max_column)
31553 {
31554 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
31555 cur_column = start_column + len;
31556 comma = "";
31557 }
31558
31559 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
31560 comma = ", ";
31561 comma_len = sizeof (", ") - 1;
31562 }
31563
31564 fputs ("\n", file);
31565 }
31566
31567 /* Helper function to print the current isa options on a line. */
31568
31569 static void
31570 rs6000_print_isa_options (FILE *file, int indent, const char *string,
31571 HOST_WIDE_INT flags)
31572 {
31573 rs6000_print_options_internal (file, indent, string, flags, "-m",
31574 &rs6000_opt_masks[0],
31575 ARRAY_SIZE (rs6000_opt_masks));
31576 }
31577
31578 static void
31579 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
31580 HOST_WIDE_INT flags)
31581 {
31582 rs6000_print_options_internal (file, indent, string, flags, "",
31583 &rs6000_builtin_mask_names[0],
31584 ARRAY_SIZE (rs6000_builtin_mask_names));
31585 }
31586
31587 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
31588 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
31589 -mupper-regs-df, etc.).
31590
31591 If the user used -mno-power8-vector, we need to turn off all of the implicit
31592 ISA 2.07 and 3.0 options that relate to the vector unit.
31593
31594 If the user used -mno-power9-vector, we need to turn off all of the implicit
31595 ISA 3.0 options that relate to the vector unit.
31596
31597 This function does not handle explicit options such as the user specifying
31598 -mdirect-move. These are handled in rs6000_option_override_internal, and
31599 the appropriate error is given if needed.
31600
31601 We return a mask of all of the implicit options that should not be enabled
31602 by default. */
31603
31604 static HOST_WIDE_INT
31605 rs6000_disable_incompatible_switches (void)
31606 {
31607 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
31608 size_t i, j;
31609
31610 static const struct {
31611 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
31612 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
31613 const char *const name; /* name of the switch. */
31614 } flags[] = {
31615 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
31616 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
31617 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
31618 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
31619 };
31620
31621 for (i = 0; i < ARRAY_SIZE (flags); i++)
31622 {
31623 HOST_WIDE_INT no_flag = flags[i].no_flag;
31624
31625 if ((rs6000_isa_flags & no_flag) == 0
31626 && (rs6000_isa_flags_explicit & no_flag) != 0)
31627 {
31628 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
31629 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
31630 & rs6000_isa_flags
31631 & dep_flags);
31632
31633 if (set_flags)
31634 {
31635 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
31636 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
31637 {
31638 set_flags &= ~rs6000_opt_masks[j].mask;
31639 error ("%<-mno-%s%> turns off %<-m%s%>",
31640 flags[i].name,
31641 rs6000_opt_masks[j].name);
31642 }
31643
31644 gcc_assert (!set_flags);
31645 }
31646
31647 rs6000_isa_flags &= ~dep_flags;
31648 ignore_masks |= no_flag | dep_flags;
31649 }
31650 }
31651
31652 return ignore_masks;
31653 }
31654
31655 \f
31656 /* Helper function for printing the function name when debugging. */
31657
31658 static const char *
31659 get_decl_name (tree fn)
31660 {
31661 tree name;
31662
31663 if (!fn)
31664 return "<null>";
31665
31666 name = DECL_NAME (fn);
31667 if (!name)
31668 return "<no-name>";
31669
31670 return IDENTIFIER_POINTER (name);
31671 }
31672
31673 /* Return the clone id of the target we are compiling code for in a target
31674 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
31675 the priority list for the target clones (ordered from lowest to
31676 highest). */
31677
31678 static int
31679 rs6000_clone_priority (tree fndecl)
31680 {
31681 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31682 HOST_WIDE_INT isa_masks;
31683 int ret = CLONE_DEFAULT;
31684 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
31685 const char *attrs_str = NULL;
31686
31687 attrs = TREE_VALUE (TREE_VALUE (attrs));
31688 attrs_str = TREE_STRING_POINTER (attrs);
31689
31690 /* Return priority zero for default function. Return the ISA needed for the
31691 function if it is not the default. */
31692 if (strcmp (attrs_str, "default") != 0)
31693 {
31694 if (fn_opts == NULL_TREE)
31695 fn_opts = target_option_default_node;
31696
31697 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
31698 isa_masks = rs6000_isa_flags;
31699 else
31700 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
31701
31702 for (ret = CLONE_MAX - 1; ret != 0; ret--)
31703 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
31704 break;
31705 }
31706
31707 if (TARGET_DEBUG_TARGET)
31708 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
31709 get_decl_name (fndecl), ret);
31710
31711 return ret;
31712 }
31713
31714 /* This compares the priority of target features in function DECL1 and DECL2.
31715 It returns positive value if DECL1 is higher priority, negative value if
31716 DECL2 is higher priority and 0 if they are the same. Note, priorities are
31717 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
31718
31719 static int
31720 rs6000_compare_version_priority (tree decl1, tree decl2)
31721 {
31722 int priority1 = rs6000_clone_priority (decl1);
31723 int priority2 = rs6000_clone_priority (decl2);
31724 int ret = priority1 - priority2;
31725
31726 if (TARGET_DEBUG_TARGET)
31727 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
31728 get_decl_name (decl1), get_decl_name (decl2), ret);
31729
31730 return ret;
31731 }
31732
31733 /* Make a dispatcher declaration for the multi-versioned function DECL.
31734 Calls to DECL function will be replaced with calls to the dispatcher
31735 by the front-end. Returns the decl of the dispatcher function. */
31736
31737 static tree
31738 rs6000_get_function_versions_dispatcher (void *decl)
31739 {
31740 tree fn = (tree) decl;
31741 struct cgraph_node *node = NULL;
31742 struct cgraph_node *default_node = NULL;
31743 struct cgraph_function_version_info *node_v = NULL;
31744 struct cgraph_function_version_info *first_v = NULL;
31745
31746 tree dispatch_decl = NULL;
31747
31748 struct cgraph_function_version_info *default_version_info = NULL;
31749 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31750
31751 if (TARGET_DEBUG_TARGET)
31752 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
31753 get_decl_name (fn));
31754
31755 node = cgraph_node::get (fn);
31756 gcc_assert (node != NULL);
31757
31758 node_v = node->function_version ();
31759 gcc_assert (node_v != NULL);
31760
31761 if (node_v->dispatcher_resolver != NULL)
31762 return node_v->dispatcher_resolver;
31763
31764 /* Find the default version and make it the first node. */
31765 first_v = node_v;
31766 /* Go to the beginning of the chain. */
31767 while (first_v->prev != NULL)
31768 first_v = first_v->prev;
31769
31770 default_version_info = first_v;
31771 while (default_version_info != NULL)
31772 {
31773 const tree decl2 = default_version_info->this_node->decl;
31774 if (is_function_default_version (decl2))
31775 break;
31776 default_version_info = default_version_info->next;
31777 }
31778
31779 /* If there is no default node, just return NULL. */
31780 if (default_version_info == NULL)
31781 return NULL;
31782
31783 /* Make default info the first node. */
31784 if (first_v != default_version_info)
31785 {
31786 default_version_info->prev->next = default_version_info->next;
31787 if (default_version_info->next)
31788 default_version_info->next->prev = default_version_info->prev;
31789 first_v->prev = default_version_info;
31790 default_version_info->next = first_v;
31791 default_version_info->prev = NULL;
31792 }
31793
31794 default_node = default_version_info->this_node;
31795
31796 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
31797 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31798 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
31799 "exports hardware capability bits");
31800 #else
31801
31802 if (targetm.has_ifunc_p ())
31803 {
31804 struct cgraph_function_version_info *it_v = NULL;
31805 struct cgraph_node *dispatcher_node = NULL;
31806 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31807
31808 /* Right now, the dispatching is done via ifunc. */
31809 dispatch_decl = make_dispatcher_decl (default_node->decl);
31810
31811 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31812 gcc_assert (dispatcher_node != NULL);
31813 dispatcher_node->dispatcher_function = 1;
31814 dispatcher_version_info
31815 = dispatcher_node->insert_new_function_version ();
31816 dispatcher_version_info->next = default_version_info;
31817 dispatcher_node->definition = 1;
31818
31819 /* Set the dispatcher for all the versions. */
31820 it_v = default_version_info;
31821 while (it_v != NULL)
31822 {
31823 it_v->dispatcher_resolver = dispatch_decl;
31824 it_v = it_v->next;
31825 }
31826 }
31827 else
31828 {
31829 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31830 "multiversioning needs ifunc which is not supported "
31831 "on this target");
31832 }
31833 #endif
31834
31835 return dispatch_decl;
31836 }
31837
31838 /* Make the resolver function decl to dispatch the versions of a multi-
31839 versioned function, DEFAULT_DECL. Create an empty basic block in the
31840 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
31841 function. */
31842
31843 static tree
31844 make_resolver_func (const tree default_decl,
31845 const tree dispatch_decl,
31846 basic_block *empty_bb)
31847 {
31848 /* Make the resolver function static. The resolver function returns
31849 void *. */
31850 tree decl_name = clone_function_name (default_decl, "resolver");
31851 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
31852 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
31853 tree decl = build_fn_decl (resolver_name, type);
31854 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31855
31856 DECL_NAME (decl) = decl_name;
31857 TREE_USED (decl) = 1;
31858 DECL_ARTIFICIAL (decl) = 1;
31859 DECL_IGNORED_P (decl) = 0;
31860 TREE_PUBLIC (decl) = 0;
31861 DECL_UNINLINABLE (decl) = 1;
31862
31863 /* Resolver is not external, body is generated. */
31864 DECL_EXTERNAL (decl) = 0;
31865 DECL_EXTERNAL (dispatch_decl) = 0;
31866
31867 DECL_CONTEXT (decl) = NULL_TREE;
31868 DECL_INITIAL (decl) = make_node (BLOCK);
31869 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31870
31871 /* Build result decl and add to function_decl. */
31872 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31873 DECL_CONTEXT (t) = decl;
31874 DECL_ARTIFICIAL (t) = 1;
31875 DECL_IGNORED_P (t) = 1;
31876 DECL_RESULT (decl) = t;
31877
31878 gimplify_function_tree (decl);
31879 push_cfun (DECL_STRUCT_FUNCTION (decl));
31880 *empty_bb = init_lowered_empty_function (decl, false,
31881 profile_count::uninitialized ());
31882
31883 cgraph_node::add_new_function (decl, true);
31884 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31885
31886 pop_cfun ();
31887
31888 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
31889 DECL_ATTRIBUTES (dispatch_decl)
31890 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
31891
31892 cgraph_node::create_same_body_alias (dispatch_decl, decl);
31893
31894 return decl;
31895 }
31896
31897 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
31898 return a pointer to VERSION_DECL if we are running on a machine that
31899 supports the index CLONE_ISA hardware architecture bits. This function will
31900 be called during version dispatch to decide which function version to
31901 execute. It returns the basic block at the end, to which more conditions
31902 can be added. */
31903
31904 static basic_block
31905 add_condition_to_bb (tree function_decl, tree version_decl,
31906 int clone_isa, basic_block new_bb)
31907 {
31908 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31909
31910 gcc_assert (new_bb != NULL);
31911 gimple_seq gseq = bb_seq (new_bb);
31912
31913
31914 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31915 build_fold_addr_expr (version_decl));
31916 tree result_var = create_tmp_var (ptr_type_node);
31917 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
31918 gimple *return_stmt = gimple_build_return (result_var);
31919
31920 if (clone_isa == CLONE_DEFAULT)
31921 {
31922 gimple_seq_add_stmt (&gseq, convert_stmt);
31923 gimple_seq_add_stmt (&gseq, return_stmt);
31924 set_bb_seq (new_bb, gseq);
31925 gimple_set_bb (convert_stmt, new_bb);
31926 gimple_set_bb (return_stmt, new_bb);
31927 pop_cfun ();
31928 return new_bb;
31929 }
31930
31931 tree bool_zero = build_int_cst (bool_int_type_node, 0);
31932 tree cond_var = create_tmp_var (bool_int_type_node);
31933 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
31934 const char *arg_str = rs6000_clone_map[clone_isa].name;
31935 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31936 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31937 gimple_call_set_lhs (call_cond_stmt, cond_var);
31938
31939 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31940 gimple_set_bb (call_cond_stmt, new_bb);
31941 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31942
31943 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
31944 NULL_TREE, NULL_TREE);
31945 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31946 gimple_set_bb (if_else_stmt, new_bb);
31947 gimple_seq_add_stmt (&gseq, if_else_stmt);
31948
31949 gimple_seq_add_stmt (&gseq, convert_stmt);
31950 gimple_seq_add_stmt (&gseq, return_stmt);
31951 set_bb_seq (new_bb, gseq);
31952
31953 basic_block bb1 = new_bb;
31954 edge e12 = split_block (bb1, if_else_stmt);
31955 basic_block bb2 = e12->dest;
31956 e12->flags &= ~EDGE_FALLTHRU;
31957 e12->flags |= EDGE_TRUE_VALUE;
31958
31959 edge e23 = split_block (bb2, return_stmt);
31960 gimple_set_bb (convert_stmt, bb2);
31961 gimple_set_bb (return_stmt, bb2);
31962
31963 basic_block bb3 = e23->dest;
31964 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31965
31966 remove_edge (e23);
31967 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31968
31969 pop_cfun ();
31970 return bb3;
31971 }
31972
31973 /* This function generates the dispatch function for multi-versioned functions.
31974 DISPATCH_DECL is the function which will contain the dispatch logic.
31975 FNDECLS are the function choices for dispatch, and is a tree chain.
31976 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
31977 code is generated. */
31978
31979 static int
31980 dispatch_function_versions (tree dispatch_decl,
31981 void *fndecls_p,
31982 basic_block *empty_bb)
31983 {
31984 int ix;
31985 tree ele;
31986 vec<tree> *fndecls;
31987 tree clones[CLONE_MAX];
31988
31989 if (TARGET_DEBUG_TARGET)
31990 fputs ("dispatch_function_versions, top\n", stderr);
31991
31992 gcc_assert (dispatch_decl != NULL
31993 && fndecls_p != NULL
31994 && empty_bb != NULL);
31995
31996 /* fndecls_p is actually a vector. */
31997 fndecls = static_cast<vec<tree> *> (fndecls_p);
31998
31999 /* At least one more version other than the default. */
32000 gcc_assert (fndecls->length () >= 2);
32001
32002 /* The first version in the vector is the default decl. */
32003 memset ((void *) clones, '\0', sizeof (clones));
32004 clones[CLONE_DEFAULT] = (*fndecls)[0];
32005
32006 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
32007 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
32008 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
32009 recent glibc. If we ever need to call __builtin_cpu_init, we would need
32010 to insert the code here to do the call. */
32011
32012 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32013 {
32014 int priority = rs6000_clone_priority (ele);
32015 if (!clones[priority])
32016 clones[priority] = ele;
32017 }
32018
32019 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
32020 if (clones[ix])
32021 {
32022 if (TARGET_DEBUG_TARGET)
32023 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
32024 ix, get_decl_name (clones[ix]));
32025
32026 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
32027 *empty_bb);
32028 }
32029
32030 return 0;
32031 }
32032
32033 /* Generate the dispatching code body to dispatch multi-versioned function
32034 DECL. The target hook is called to process the "target" attributes and
32035 provide the code to dispatch the right function at run-time. NODE points
32036 to the dispatcher decl whose body will be created. */
32037
32038 static tree
32039 rs6000_generate_version_dispatcher_body (void *node_p)
32040 {
32041 tree resolver;
32042 basic_block empty_bb;
32043 struct cgraph_node *node = (cgraph_node *) node_p;
32044 struct cgraph_function_version_info *ninfo = node->function_version ();
32045
32046 if (ninfo->dispatcher_resolver)
32047 return ninfo->dispatcher_resolver;
32048
32049 /* node is going to be an alias, so remove the finalized bit. */
32050 node->definition = false;
32051
32052 /* The first version in the chain corresponds to the default version. */
32053 ninfo->dispatcher_resolver = resolver
32054 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
32055
32056 if (TARGET_DEBUG_TARGET)
32057 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
32058 get_decl_name (resolver));
32059
32060 push_cfun (DECL_STRUCT_FUNCTION (resolver));
32061 auto_vec<tree, 2> fn_ver_vec;
32062
32063 for (struct cgraph_function_version_info *vinfo = ninfo->next;
32064 vinfo;
32065 vinfo = vinfo->next)
32066 {
32067 struct cgraph_node *version = vinfo->this_node;
32068 /* Check for virtual functions here again, as by this time it should
32069 have been determined if this function needs a vtable index or
32070 not. This happens for methods in derived classes that override
32071 virtual methods in base classes but are not explicitly marked as
32072 virtual. */
32073 if (DECL_VINDEX (version->decl))
32074 sorry ("Virtual function multiversioning not supported");
32075
32076 fn_ver_vec.safe_push (version->decl);
32077 }
32078
32079 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
32080 cgraph_edge::rebuild_edges ();
32081 pop_cfun ();
32082 return resolver;
32083 }
32084
32085 \f
32086 /* Hook to determine if one function can safely inline another. */
32087
32088 static bool
32089 rs6000_can_inline_p (tree caller, tree callee)
32090 {
32091 bool ret = false;
32092 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32093 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32094
32095 /* If callee has no option attributes, then it is ok to inline. */
32096 if (!callee_tree)
32097 ret = true;
32098
32099 /* If caller has no option attributes, but callee does then it is not ok to
32100 inline. */
32101 else if (!caller_tree)
32102 ret = false;
32103
32104 else
32105 {
32106 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32107 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32108
32109 /* Callee's options should a subset of the caller's, i.e. a vsx function
32110 can inline an altivec function but a non-vsx function can't inline a
32111 vsx function. */
32112 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32113 == callee_opts->x_rs6000_isa_flags)
32114 ret = true;
32115 }
32116
32117 if (TARGET_DEBUG_TARGET)
32118 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32119 get_decl_name (caller), get_decl_name (callee),
32120 (ret ? "can" : "cannot"));
32121
32122 return ret;
32123 }
32124 \f
32125 /* Allocate a stack temp and fixup the address so it meets the particular
32126 memory requirements (either offetable or REG+REG addressing). */
32127
32128 rtx
32129 rs6000_allocate_stack_temp (machine_mode mode,
32130 bool offsettable_p,
32131 bool reg_reg_p)
32132 {
32133 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32134 rtx addr = XEXP (stack, 0);
32135 int strict_p = reload_completed;
32136
32137 if (!legitimate_indirect_address_p (addr, strict_p))
32138 {
32139 if (offsettable_p
32140 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32141 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32142
32143 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32144 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32145 }
32146
32147 return stack;
32148 }
32149
32150 /* Given a memory reference, if it is not a reg or reg+reg addressing,
32151 convert to such a form to deal with memory reference instructions
32152 like STFIWX and LDBRX that only take reg+reg addressing. */
32153
32154 rtx
32155 rs6000_force_indexed_or_indirect_mem (rtx x)
32156 {
32157 machine_mode mode = GET_MODE (x);
32158
32159 gcc_assert (MEM_P (x));
32160 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
32161 {
32162 rtx addr = XEXP (x, 0);
32163 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32164 {
32165 rtx reg = XEXP (addr, 0);
32166 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32167 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32168 gcc_assert (REG_P (reg));
32169 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32170 addr = reg;
32171 }
32172 else if (GET_CODE (addr) == PRE_MODIFY)
32173 {
32174 rtx reg = XEXP (addr, 0);
32175 rtx expr = XEXP (addr, 1);
32176 gcc_assert (REG_P (reg));
32177 gcc_assert (GET_CODE (expr) == PLUS);
32178 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32179 addr = reg;
32180 }
32181
32182 x = replace_equiv_address (x, force_reg (Pmode, addr));
32183 }
32184
32185 return x;
32186 }
32187
32188 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32189
32190 On the RS/6000, all integer constants are acceptable, most won't be valid
32191 for particular insns, though. Only easy FP constants are acceptable. */
32192
32193 static bool
32194 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32195 {
32196 if (TARGET_ELF && tls_referenced_p (x))
32197 return false;
32198
32199 if (CONST_DOUBLE_P (x))
32200 return easy_fp_constant (x, mode);
32201
32202 if (GET_CODE (x) == CONST_VECTOR)
32203 return easy_vector_constant (x, mode);
32204
32205 return true;
32206 }
32207
32208 \f
32209 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
32210
32211 static bool
32212 chain_already_loaded (rtx_insn *last)
32213 {
32214 for (; last != NULL; last = PREV_INSN (last))
32215 {
32216 if (NONJUMP_INSN_P (last))
32217 {
32218 rtx patt = PATTERN (last);
32219
32220 if (GET_CODE (patt) == SET)
32221 {
32222 rtx lhs = XEXP (patt, 0);
32223
32224 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
32225 return true;
32226 }
32227 }
32228 }
32229 return false;
32230 }
32231
32232 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32233
32234 void
32235 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32236 {
32237 rtx func = func_desc;
32238 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32239 rtx toc_load = NULL_RTX;
32240 rtx toc_restore = NULL_RTX;
32241 rtx func_addr;
32242 rtx abi_reg = NULL_RTX;
32243 rtx call[4];
32244 int n_call;
32245 rtx insn;
32246 bool is_pltseq_longcall;
32247
32248 if (global_tlsarg)
32249 tlsarg = global_tlsarg;
32250
32251 /* Handle longcall attributes. */
32252 is_pltseq_longcall = false;
32253 if ((INTVAL (cookie) & CALL_LONG) != 0
32254 && GET_CODE (func_desc) == SYMBOL_REF)
32255 {
32256 func = rs6000_longcall_ref (func_desc, tlsarg);
32257 if (TARGET_PLTSEQ)
32258 is_pltseq_longcall = true;
32259 }
32260
32261 /* Handle indirect calls. */
32262 if (!SYMBOL_REF_P (func)
32263 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
32264 {
32265 if (!rs6000_pcrel_p (cfun))
32266 {
32267 /* Save the TOC into its reserved slot before the call,
32268 and prepare to restore it after the call. */
32269 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32270 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
32271 gen_rtvec (1, stack_toc_offset),
32272 UNSPEC_TOCSLOT);
32273 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
32274
32275 /* Can we optimize saving the TOC in the prologue or
32276 do we need to do it at every call? */
32277 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32278 cfun->machine->save_toc_in_prologue = true;
32279 else
32280 {
32281 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32282 rtx stack_toc_mem = gen_frame_mem (Pmode,
32283 gen_rtx_PLUS (Pmode, stack_ptr,
32284 stack_toc_offset));
32285 MEM_VOLATILE_P (stack_toc_mem) = 1;
32286 if (is_pltseq_longcall)
32287 {
32288 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
32289 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32290 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
32291 }
32292 else
32293 emit_move_insn (stack_toc_mem, toc_reg);
32294 }
32295 }
32296
32297 if (DEFAULT_ABI == ABI_ELFv2)
32298 {
32299 /* A function pointer in the ELFv2 ABI is just a plain address, but
32300 the ABI requires it to be loaded into r12 before the call. */
32301 func_addr = gen_rtx_REG (Pmode, 12);
32302 if (!rtx_equal_p (func_addr, func))
32303 emit_move_insn (func_addr, func);
32304 abi_reg = func_addr;
32305 /* Indirect calls via CTR are strongly preferred over indirect
32306 calls via LR, so move the address there. Needed to mark
32307 this insn for linker plt sequence editing too. */
32308 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32309 if (is_pltseq_longcall)
32310 {
32311 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
32312 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32313 emit_insn (gen_rtx_SET (func_addr, mark_func));
32314 v = gen_rtvec (2, func_addr, func_desc);
32315 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32316 }
32317 else
32318 emit_move_insn (func_addr, abi_reg);
32319 }
32320 else
32321 {
32322 /* A function pointer under AIX is a pointer to a data area whose
32323 first word contains the actual address of the function, whose
32324 second word contains a pointer to its TOC, and whose third word
32325 contains a value to place in the static chain register (r11).
32326 Note that if we load the static chain, our "trampoline" need
32327 not have any executable code. */
32328
32329 /* Load up address of the actual function. */
32330 func = force_reg (Pmode, func);
32331 func_addr = gen_reg_rtx (Pmode);
32332 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
32333
32334 /* Indirect calls via CTR are strongly preferred over indirect
32335 calls via LR, so move the address there. */
32336 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
32337 emit_move_insn (ctr_reg, func_addr);
32338 func_addr = ctr_reg;
32339
32340 /* Prepare to load the TOC of the called function. Note that the
32341 TOC load must happen immediately before the actual call so
32342 that unwinding the TOC registers works correctly. See the
32343 comment in frob_update_context. */
32344 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32345 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32346 gen_rtx_PLUS (Pmode, func,
32347 func_toc_offset));
32348 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32349
32350 /* If we have a static chain, load it up. But, if the call was
32351 originally direct, the 3rd word has not been written since no
32352 trampoline has been built, so we ought not to load it, lest we
32353 override a static chain value. */
32354 if (!(GET_CODE (func_desc) == SYMBOL_REF
32355 && SYMBOL_REF_FUNCTION_P (func_desc))
32356 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
32357 && !chain_already_loaded (get_current_sequence ()->next->last))
32358 {
32359 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32360 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32361 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32362 gen_rtx_PLUS (Pmode, func,
32363 func_sc_offset));
32364 emit_move_insn (sc_reg, func_sc_mem);
32365 abi_reg = sc_reg;
32366 }
32367 }
32368 }
32369 else
32370 {
32371 /* No TOC register needed for calls from PC-relative callers. */
32372 if (!rs6000_pcrel_p (cfun))
32373 /* Direct calls use the TOC: for local calls, the callee will
32374 assume the TOC register is set; for non-local calls, the
32375 PLT stub needs the TOC register. */
32376 abi_reg = toc_reg;
32377 func_addr = func;
32378 }
32379
32380 /* Create the call. */
32381 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32382 if (value != NULL_RTX)
32383 call[0] = gen_rtx_SET (value, call[0]);
32384 n_call = 1;
32385
32386 if (toc_load)
32387 call[n_call++] = toc_load;
32388 if (toc_restore)
32389 call[n_call++] = toc_restore;
32390
32391 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32392
32393 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32394 insn = emit_call_insn (insn);
32395
32396 /* Mention all registers defined by the ABI to hold information
32397 as uses in CALL_INSN_FUNCTION_USAGE. */
32398 if (abi_reg)
32399 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32400 }
32401
32402 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32403
32404 void
32405 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32406 {
32407 rtx call[2];
32408 rtx insn;
32409
32410 gcc_assert (INTVAL (cookie) == 0);
32411
32412 if (global_tlsarg)
32413 tlsarg = global_tlsarg;
32414
32415 /* Create the call. */
32416 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
32417 if (value != NULL_RTX)
32418 call[0] = gen_rtx_SET (value, call[0]);
32419
32420 call[1] = simple_return_rtx;
32421
32422 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32423 insn = emit_call_insn (insn);
32424
32425 /* Note use of the TOC register. */
32426 if (!rs6000_pcrel_p (cfun))
32427 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
32428 gen_rtx_REG (Pmode, TOC_REGNUM));
32429 }
32430
32431 /* Expand code to perform a call under the SYSV4 ABI. */
32432
32433 void
32434 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32435 {
32436 rtx func = func_desc;
32437 rtx func_addr;
32438 rtx call[4];
32439 rtx insn;
32440 rtx abi_reg = NULL_RTX;
32441 int n;
32442
32443 if (global_tlsarg)
32444 tlsarg = global_tlsarg;
32445
32446 /* Handle longcall attributes. */
32447 if ((INTVAL (cookie) & CALL_LONG) != 0
32448 && GET_CODE (func_desc) == SYMBOL_REF)
32449 {
32450 func = rs6000_longcall_ref (func_desc, tlsarg);
32451 /* If the longcall was implemented as an inline PLT call using
32452 PLT unspecs then func will be REG:r11. If not, func will be
32453 a pseudo reg. The inline PLT call sequence supports lazy
32454 linking (and longcalls to functions in dlopen'd libraries).
32455 The other style of longcalls don't. The lazy linking entry
32456 to the dynamic symbol resolver requires r11 be the function
32457 address (as it is for linker generated PLT stubs). Ensure
32458 r11 stays valid to the bctrl by marking r11 used by the call. */
32459 if (TARGET_PLTSEQ)
32460 abi_reg = func;
32461 }
32462
32463 /* Handle indirect calls. */
32464 if (GET_CODE (func) != SYMBOL_REF)
32465 {
32466 func = force_reg (Pmode, func);
32467
32468 /* Indirect calls via CTR are strongly preferred over indirect
32469 calls via LR, so move the address there. That can't be left
32470 to reload because we want to mark every instruction in an
32471 inline PLT call sequence with a reloc, enabling the linker to
32472 edit the sequence back to a direct call when that makes sense. */
32473 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32474 if (abi_reg)
32475 {
32476 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32477 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32478 emit_insn (gen_rtx_SET (func_addr, mark_func));
32479 v = gen_rtvec (2, func_addr, func_desc);
32480 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32481 }
32482 else
32483 emit_move_insn (func_addr, func);
32484 }
32485 else
32486 func_addr = func;
32487
32488 /* Create the call. */
32489 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32490 if (value != NULL_RTX)
32491 call[0] = gen_rtx_SET (value, call[0]);
32492
32493 call[1] = gen_rtx_USE (VOIDmode, cookie);
32494 n = 2;
32495 if (TARGET_SECURE_PLT
32496 && flag_pic
32497 && GET_CODE (func_addr) == SYMBOL_REF
32498 && !SYMBOL_REF_LOCAL_P (func_addr))
32499 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
32500
32501 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32502
32503 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
32504 insn = emit_call_insn (insn);
32505 if (abi_reg)
32506 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32507 }
32508
32509 /* Expand code to perform a sibling call under the SysV4 ABI. */
32510
32511 void
32512 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32513 {
32514 rtx func = func_desc;
32515 rtx func_addr;
32516 rtx call[3];
32517 rtx insn;
32518 rtx abi_reg = NULL_RTX;
32519
32520 if (global_tlsarg)
32521 tlsarg = global_tlsarg;
32522
32523 /* Handle longcall attributes. */
32524 if ((INTVAL (cookie) & CALL_LONG) != 0
32525 && GET_CODE (func_desc) == SYMBOL_REF)
32526 {
32527 func = rs6000_longcall_ref (func_desc, tlsarg);
32528 /* If the longcall was implemented as an inline PLT call using
32529 PLT unspecs then func will be REG:r11. If not, func will be
32530 a pseudo reg. The inline PLT call sequence supports lazy
32531 linking (and longcalls to functions in dlopen'd libraries).
32532 The other style of longcalls don't. The lazy linking entry
32533 to the dynamic symbol resolver requires r11 be the function
32534 address (as it is for linker generated PLT stubs). Ensure
32535 r11 stays valid to the bctr by marking r11 used by the call. */
32536 if (TARGET_PLTSEQ)
32537 abi_reg = func;
32538 }
32539
32540 /* Handle indirect calls. */
32541 if (GET_CODE (func) != SYMBOL_REF)
32542 {
32543 func = force_reg (Pmode, func);
32544
32545 /* Indirect sibcalls must go via CTR. That can't be left to
32546 reload because we want to mark every instruction in an inline
32547 PLT call sequence with a reloc, enabling the linker to edit
32548 the sequence back to a direct call when that makes sense. */
32549 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32550 if (abi_reg)
32551 {
32552 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32553 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32554 emit_insn (gen_rtx_SET (func_addr, mark_func));
32555 v = gen_rtvec (2, func_addr, func_desc);
32556 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32557 }
32558 else
32559 emit_move_insn (func_addr, func);
32560 }
32561 else
32562 func_addr = func;
32563
32564 /* Create the call. */
32565 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32566 if (value != NULL_RTX)
32567 call[0] = gen_rtx_SET (value, call[0]);
32568
32569 call[1] = gen_rtx_USE (VOIDmode, cookie);
32570 call[2] = simple_return_rtx;
32571
32572 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32573 insn = emit_call_insn (insn);
32574 if (abi_reg)
32575 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32576 }
32577
32578 #if TARGET_MACHO
32579
32580 /* Expand code to perform a call under the Darwin ABI.
32581 Modulo handling of mlongcall, this is much the same as sysv.
32582 if/when the longcall optimisation is removed, we could drop this
32583 code and use the sysv case (taking care to avoid the tls stuff).
32584
32585 We can use this for sibcalls too, if needed. */
32586
32587 void
32588 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
32589 rtx cookie, bool sibcall)
32590 {
32591 rtx func = func_desc;
32592 rtx func_addr;
32593 rtx call[3];
32594 rtx insn;
32595 int cookie_val = INTVAL (cookie);
32596 bool make_island = false;
32597
32598 /* Handle longcall attributes, there are two cases for Darwin:
32599 1) Newer linkers are capable of synthesising any branch islands needed.
32600 2) We need a helper branch island synthesised by the compiler.
32601 The second case has mostly been retired and we don't use it for m64.
32602 In fact, it's is an optimisation, we could just indirect as sysv does..
32603 ... however, backwards compatibility for now.
32604 If we're going to use this, then we need to keep the CALL_LONG bit set,
32605 so that we can pick up the special insn form later. */
32606 if ((cookie_val & CALL_LONG) != 0
32607 && GET_CODE (func_desc) == SYMBOL_REF)
32608 {
32609 /* FIXME: the longcall opt should not hang off picsymbol stubs. */
32610 if (darwin_picsymbol_stubs && TARGET_32BIT)
32611 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
32612 else
32613 {
32614 /* The linker is capable of doing this, but the user explicitly
32615 asked for -mlongcall, so we'll do the 'normal' version. */
32616 func = rs6000_longcall_ref (func_desc, NULL_RTX);
32617 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
32618 }
32619 }
32620
32621 /* Handle indirect calls. */
32622 if (GET_CODE (func) != SYMBOL_REF)
32623 {
32624 func = force_reg (Pmode, func);
32625
32626 /* Indirect calls via CTR are strongly preferred over indirect
32627 calls via LR, and are required for indirect sibcalls, so move
32628 the address there. */
32629 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32630 emit_move_insn (func_addr, func);
32631 }
32632 else
32633 func_addr = func;
32634
32635 /* Create the call. */
32636 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32637 if (value != NULL_RTX)
32638 call[0] = gen_rtx_SET (value, call[0]);
32639
32640 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
32641
32642 if (sibcall)
32643 call[2] = simple_return_rtx;
32644 else
32645 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32646
32647 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32648 insn = emit_call_insn (insn);
32649 /* Now we have the debug info in the insn, we can set up the branch island
32650 if we're using one. */
32651 if (make_island)
32652 {
32653 tree funname = get_identifier (XSTR (func_desc, 0));
32654
32655 if (no_previous_def (funname))
32656 {
32657 rtx label_rtx = gen_label_rtx ();
32658 char *label_buf, temp_buf[256];
32659 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32660 CODE_LABEL_NUMBER (label_rtx));
32661 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32662 tree labelname = get_identifier (label_buf);
32663 add_compiler_branch_island (labelname, funname,
32664 insn_line ((const rtx_insn*)insn));
32665 }
32666 }
32667 }
32668 #endif
32669
32670 void
32671 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32672 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32673 {
32674 #if TARGET_MACHO
32675 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
32676 #else
32677 gcc_unreachable();
32678 #endif
32679 }
32680
32681
32682 void
32683 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32684 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32685 {
32686 #if TARGET_MACHO
32687 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
32688 #else
32689 gcc_unreachable();
32690 #endif
32691 }
32692
32693 /* Return whether we should generate PC-relative code for FNDECL. */
32694 bool
32695 rs6000_fndecl_pcrel_p (const_tree fndecl)
32696 {
32697 if (DEFAULT_ABI != ABI_ELFv2)
32698 return false;
32699
32700 struct cl_target_option *opts = target_opts_for_fn (fndecl);
32701
32702 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32703 && TARGET_CMODEL == CMODEL_MEDIUM);
32704 }
32705
32706 /* Return whether we should generate PC-relative code for *FN. */
32707 bool
32708 rs6000_pcrel_p (struct function *fn)
32709 {
32710 if (DEFAULT_ABI != ABI_ELFv2)
32711 return false;
32712
32713 /* Optimize usual case. */
32714 if (fn == cfun)
32715 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32716 && TARGET_CMODEL == CMODEL_MEDIUM);
32717
32718 return rs6000_fndecl_pcrel_p (fn->decl);
32719 }
32720
32721 #ifdef HAVE_GAS_HIDDEN
32722 # define USE_HIDDEN_LINKONCE 1
32723 #else
32724 # define USE_HIDDEN_LINKONCE 0
32725 #endif
32726
32727 /* Fills in the label name that should be used for a 476 link stack thunk. */
32728
32729 void
32730 get_ppc476_thunk_name (char name[32])
32731 {
32732 gcc_assert (TARGET_LINK_STACK);
32733
32734 if (USE_HIDDEN_LINKONCE)
32735 sprintf (name, "__ppc476.get_thunk");
32736 else
32737 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32738 }
32739
32740 /* This function emits the simple thunk routine that is used to preserve
32741 the link stack on the 476 cpu. */
32742
32743 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
32744 static void
32745 rs6000_code_end (void)
32746 {
32747 char name[32];
32748 tree decl;
32749
32750 if (!TARGET_LINK_STACK)
32751 return;
32752
32753 get_ppc476_thunk_name (name);
32754
32755 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
32756 build_function_type_list (void_type_node, NULL_TREE));
32757 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
32758 NULL_TREE, void_type_node);
32759 TREE_PUBLIC (decl) = 1;
32760 TREE_STATIC (decl) = 1;
32761
32762 #if RS6000_WEAK
32763 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
32764 {
32765 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
32766 targetm.asm_out.unique_section (decl, 0);
32767 switch_to_section (get_named_section (decl, NULL, 0));
32768 DECL_WEAK (decl) = 1;
32769 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
32770 targetm.asm_out.globalize_label (asm_out_file, name);
32771 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
32772 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
32773 }
32774 else
32775 #endif
32776 {
32777 switch_to_section (text_section);
32778 ASM_OUTPUT_LABEL (asm_out_file, name);
32779 }
32780
32781 DECL_INITIAL (decl) = make_node (BLOCK);
32782 current_function_decl = decl;
32783 allocate_struct_function (decl, false);
32784 init_function_start (decl);
32785 first_function_block_is_cold = false;
32786 /* Make sure unwind info is emitted for the thunk if needed. */
32787 final_start_function (emit_barrier (), asm_out_file, 1);
32788
32789 fputs ("\tblr\n", asm_out_file);
32790
32791 final_end_function ();
32792 init_insn_lengths ();
32793 free_after_compilation (cfun);
32794 set_cfun (NULL);
32795 current_function_decl = NULL;
32796 }
32797
32798 /* Add r30 to hard reg set if the prologue sets it up and it is not
32799 pic_offset_table_rtx. */
32800
32801 static void
32802 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
32803 {
32804 if (!TARGET_SINGLE_PIC_BASE
32805 && TARGET_TOC
32806 && TARGET_MINIMAL_TOC
32807 && !constant_pool_empty_p ())
32808 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32809 if (cfun->machine->split_stack_argp_used)
32810 add_to_hard_reg_set (&set->set, Pmode, 12);
32811
32812 /* Make sure the hard reg set doesn't include r2, which was possibly added
32813 via PIC_OFFSET_TABLE_REGNUM. */
32814 if (TARGET_TOC)
32815 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
32816 }
32817
32818 \f
32819 /* Helper function for rs6000_split_logical to emit a logical instruction after
32820 spliting the operation to single GPR registers.
32821
32822 DEST is the destination register.
32823 OP1 and OP2 are the input source registers.
32824 CODE is the base operation (AND, IOR, XOR, NOT).
32825 MODE is the machine mode.
32826 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32827 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32828 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32829
32830 static void
32831 rs6000_split_logical_inner (rtx dest,
32832 rtx op1,
32833 rtx op2,
32834 enum rtx_code code,
32835 machine_mode mode,
32836 bool complement_final_p,
32837 bool complement_op1_p,
32838 bool complement_op2_p)
32839 {
32840 rtx bool_rtx;
32841
32842 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
32843 if (op2 && CONST_INT_P (op2)
32844 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
32845 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32846 {
32847 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
32848 HOST_WIDE_INT value = INTVAL (op2) & mask;
32849
32850 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
32851 if (code == AND)
32852 {
32853 if (value == 0)
32854 {
32855 emit_insn (gen_rtx_SET (dest, const0_rtx));
32856 return;
32857 }
32858
32859 else if (value == mask)
32860 {
32861 if (!rtx_equal_p (dest, op1))
32862 emit_insn (gen_rtx_SET (dest, op1));
32863 return;
32864 }
32865 }
32866
32867 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
32868 into separate ORI/ORIS or XORI/XORIS instrucitons. */
32869 else if (code == IOR || code == XOR)
32870 {
32871 if (value == 0)
32872 {
32873 if (!rtx_equal_p (dest, op1))
32874 emit_insn (gen_rtx_SET (dest, op1));
32875 return;
32876 }
32877 }
32878 }
32879
32880 if (code == AND && mode == SImode
32881 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32882 {
32883 emit_insn (gen_andsi3 (dest, op1, op2));
32884 return;
32885 }
32886
32887 if (complement_op1_p)
32888 op1 = gen_rtx_NOT (mode, op1);
32889
32890 if (complement_op2_p)
32891 op2 = gen_rtx_NOT (mode, op2);
32892
32893 /* For canonical RTL, if only one arm is inverted it is the first. */
32894 if (!complement_op1_p && complement_op2_p)
32895 std::swap (op1, op2);
32896
32897 bool_rtx = ((code == NOT)
32898 ? gen_rtx_NOT (mode, op1)
32899 : gen_rtx_fmt_ee (code, mode, op1, op2));
32900
32901 if (complement_final_p)
32902 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
32903
32904 emit_insn (gen_rtx_SET (dest, bool_rtx));
32905 }
32906
32907 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
32908 operations are split immediately during RTL generation to allow for more
32909 optimizations of the AND/IOR/XOR.
32910
32911 OPERANDS is an array containing the destination and two input operands.
32912 CODE is the base operation (AND, IOR, XOR, NOT).
32913 MODE is the machine mode.
32914 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32915 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32916 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32917 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32918 formation of the AND instructions. */
32919
32920 static void
32921 rs6000_split_logical_di (rtx operands[3],
32922 enum rtx_code code,
32923 bool complement_final_p,
32924 bool complement_op1_p,
32925 bool complement_op2_p)
32926 {
32927 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
32928 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
32929 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
32930 enum hi_lo { hi = 0, lo = 1 };
32931 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
32932 size_t i;
32933
32934 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
32935 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
32936 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
32937 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
32938
32939 if (code == NOT)
32940 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
32941 else
32942 {
32943 if (!CONST_INT_P (operands[2]))
32944 {
32945 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
32946 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
32947 }
32948 else
32949 {
32950 HOST_WIDE_INT value = INTVAL (operands[2]);
32951 HOST_WIDE_INT value_hi_lo[2];
32952
32953 gcc_assert (!complement_final_p);
32954 gcc_assert (!complement_op1_p);
32955 gcc_assert (!complement_op2_p);
32956
32957 value_hi_lo[hi] = value >> 32;
32958 value_hi_lo[lo] = value & lower_32bits;
32959
32960 for (i = 0; i < 2; i++)
32961 {
32962 HOST_WIDE_INT sub_value = value_hi_lo[i];
32963
32964 if (sub_value & sign_bit)
32965 sub_value |= upper_32bits;
32966
32967 op2_hi_lo[i] = GEN_INT (sub_value);
32968
32969 /* If this is an AND instruction, check to see if we need to load
32970 the value in a register. */
32971 if (code == AND && sub_value != -1 && sub_value != 0
32972 && !and_operand (op2_hi_lo[i], SImode))
32973 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
32974 }
32975 }
32976 }
32977
32978 for (i = 0; i < 2; i++)
32979 {
32980 /* Split large IOR/XOR operations. */
32981 if ((code == IOR || code == XOR)
32982 && CONST_INT_P (op2_hi_lo[i])
32983 && !complement_final_p
32984 && !complement_op1_p
32985 && !complement_op2_p
32986 && !logical_const_operand (op2_hi_lo[i], SImode))
32987 {
32988 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
32989 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
32990 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
32991 rtx tmp = gen_reg_rtx (SImode);
32992
32993 /* Make sure the constant is sign extended. */
32994 if ((hi_16bits & sign_bit) != 0)
32995 hi_16bits |= upper_32bits;
32996
32997 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
32998 code, SImode, false, false, false);
32999
33000 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33001 code, SImode, false, false, false);
33002 }
33003 else
33004 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33005 code, SImode, complement_final_p,
33006 complement_op1_p, complement_op2_p);
33007 }
33008
33009 return;
33010 }
33011
33012 /* Split the insns that make up boolean operations operating on multiple GPR
33013 registers. The boolean MD patterns ensure that the inputs either are
33014 exactly the same as the output registers, or there is no overlap.
33015
33016 OPERANDS is an array containing the destination and two input operands.
33017 CODE is the base operation (AND, IOR, XOR, NOT).
33018 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33019 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33020 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33021
33022 void
33023 rs6000_split_logical (rtx operands[3],
33024 enum rtx_code code,
33025 bool complement_final_p,
33026 bool complement_op1_p,
33027 bool complement_op2_p)
33028 {
33029 machine_mode mode = GET_MODE (operands[0]);
33030 machine_mode sub_mode;
33031 rtx op0, op1, op2;
33032 int sub_size, regno0, regno1, nregs, i;
33033
33034 /* If this is DImode, use the specialized version that can run before
33035 register allocation. */
33036 if (mode == DImode && !TARGET_POWERPC64)
33037 {
33038 rs6000_split_logical_di (operands, code, complement_final_p,
33039 complement_op1_p, complement_op2_p);
33040 return;
33041 }
33042
33043 op0 = operands[0];
33044 op1 = operands[1];
33045 op2 = (code == NOT) ? NULL_RTX : operands[2];
33046 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33047 sub_size = GET_MODE_SIZE (sub_mode);
33048 regno0 = REGNO (op0);
33049 regno1 = REGNO (op1);
33050
33051 gcc_assert (reload_completed);
33052 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33053 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33054
33055 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33056 gcc_assert (nregs > 1);
33057
33058 if (op2 && REG_P (op2))
33059 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33060
33061 for (i = 0; i < nregs; i++)
33062 {
33063 int offset = i * sub_size;
33064 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33065 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33066 rtx sub_op2 = ((code == NOT)
33067 ? NULL_RTX
33068 : simplify_subreg (sub_mode, op2, mode, offset));
33069
33070 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33071 complement_final_p, complement_op1_p,
33072 complement_op2_p);
33073 }
33074
33075 return;
33076 }
33077
33078 \f
33079 /* Return true if the peephole2 can combine a load involving a combination of
33080 an addis instruction and a load with an offset that can be fused together on
33081 a power8. */
33082
33083 bool
33084 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33085 rtx addis_value, /* addis value. */
33086 rtx target, /* target register that is loaded. */
33087 rtx mem) /* bottom part of the memory addr. */
33088 {
33089 rtx addr;
33090 rtx base_reg;
33091
33092 /* Validate arguments. */
33093 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33094 return false;
33095
33096 if (!base_reg_operand (target, GET_MODE (target)))
33097 return false;
33098
33099 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33100 return false;
33101
33102 /* Allow sign/zero extension. */
33103 if (GET_CODE (mem) == ZERO_EXTEND
33104 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33105 mem = XEXP (mem, 0);
33106
33107 if (!MEM_P (mem))
33108 return false;
33109
33110 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33111 return false;
33112
33113 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33114 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33115 return false;
33116
33117 /* Validate that the register used to load the high value is either the
33118 register being loaded, or we can safely replace its use.
33119
33120 This function is only called from the peephole2 pass and we assume that
33121 there are 2 instructions in the peephole (addis and load), so we want to
33122 check if the target register was not used in the memory address and the
33123 register to hold the addis result is dead after the peephole. */
33124 if (REGNO (addis_reg) != REGNO (target))
33125 {
33126 if (reg_mentioned_p (target, mem))
33127 return false;
33128
33129 if (!peep2_reg_dead_p (2, addis_reg))
33130 return false;
33131
33132 /* If the target register being loaded is the stack pointer, we must
33133 avoid loading any other value into it, even temporarily. */
33134 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33135 return false;
33136 }
33137
33138 base_reg = XEXP (addr, 0);
33139 return REGNO (addis_reg) == REGNO (base_reg);
33140 }
33141
33142 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33143 sequence. We adjust the addis register to use the target register. If the
33144 load sign extends, we adjust the code to do the zero extending load, and an
33145 explicit sign extension later since the fusion only covers zero extending
33146 loads.
33147
33148 The operands are:
33149 operands[0] register set with addis (to be replaced with target)
33150 operands[1] value set via addis
33151 operands[2] target register being loaded
33152 operands[3] D-form memory reference using operands[0]. */
33153
33154 void
33155 expand_fusion_gpr_load (rtx *operands)
33156 {
33157 rtx addis_value = operands[1];
33158 rtx target = operands[2];
33159 rtx orig_mem = operands[3];
33160 rtx new_addr, new_mem, orig_addr, offset;
33161 enum rtx_code plus_or_lo_sum;
33162 machine_mode target_mode = GET_MODE (target);
33163 machine_mode extend_mode = target_mode;
33164 machine_mode ptr_mode = Pmode;
33165 enum rtx_code extend = UNKNOWN;
33166
33167 if (GET_CODE (orig_mem) == ZERO_EXTEND
33168 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33169 {
33170 extend = GET_CODE (orig_mem);
33171 orig_mem = XEXP (orig_mem, 0);
33172 target_mode = GET_MODE (orig_mem);
33173 }
33174
33175 gcc_assert (MEM_P (orig_mem));
33176
33177 orig_addr = XEXP (orig_mem, 0);
33178 plus_or_lo_sum = GET_CODE (orig_addr);
33179 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33180
33181 offset = XEXP (orig_addr, 1);
33182 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33183 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33184
33185 if (extend != UNKNOWN)
33186 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33187
33188 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33189 UNSPEC_FUSION_GPR);
33190 emit_insn (gen_rtx_SET (target, new_mem));
33191
33192 if (extend == SIGN_EXTEND)
33193 {
33194 int sub_off = ((BYTES_BIG_ENDIAN)
33195 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33196 : 0);
33197 rtx sign_reg
33198 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33199
33200 emit_insn (gen_rtx_SET (target,
33201 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33202 }
33203
33204 return;
33205 }
33206
33207 /* Emit the addis instruction that will be part of a fused instruction
33208 sequence. */
33209
33210 void
33211 emit_fusion_addis (rtx target, rtx addis_value)
33212 {
33213 rtx fuse_ops[10];
33214 const char *addis_str = NULL;
33215
33216 /* Emit the addis instruction. */
33217 fuse_ops[0] = target;
33218 if (satisfies_constraint_L (addis_value))
33219 {
33220 fuse_ops[1] = addis_value;
33221 addis_str = "lis %0,%v1";
33222 }
33223
33224 else if (GET_CODE (addis_value) == PLUS)
33225 {
33226 rtx op0 = XEXP (addis_value, 0);
33227 rtx op1 = XEXP (addis_value, 1);
33228
33229 if (REG_P (op0) && CONST_INT_P (op1)
33230 && satisfies_constraint_L (op1))
33231 {
33232 fuse_ops[1] = op0;
33233 fuse_ops[2] = op1;
33234 addis_str = "addis %0,%1,%v2";
33235 }
33236 }
33237
33238 else if (GET_CODE (addis_value) == HIGH)
33239 {
33240 rtx value = XEXP (addis_value, 0);
33241 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33242 {
33243 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33244 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33245 if (TARGET_ELF)
33246 addis_str = "addis %0,%2,%1@toc@ha";
33247
33248 else if (TARGET_XCOFF)
33249 addis_str = "addis %0,%1@u(%2)";
33250
33251 else
33252 gcc_unreachable ();
33253 }
33254
33255 else if (GET_CODE (value) == PLUS)
33256 {
33257 rtx op0 = XEXP (value, 0);
33258 rtx op1 = XEXP (value, 1);
33259
33260 if (GET_CODE (op0) == UNSPEC
33261 && XINT (op0, 1) == UNSPEC_TOCREL
33262 && CONST_INT_P (op1))
33263 {
33264 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33265 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33266 fuse_ops[3] = op1;
33267 if (TARGET_ELF)
33268 addis_str = "addis %0,%2,%1+%3@toc@ha";
33269
33270 else if (TARGET_XCOFF)
33271 addis_str = "addis %0,%1+%3@u(%2)";
33272
33273 else
33274 gcc_unreachable ();
33275 }
33276 }
33277
33278 else if (satisfies_constraint_L (value))
33279 {
33280 fuse_ops[1] = value;
33281 addis_str = "lis %0,%v1";
33282 }
33283
33284 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33285 {
33286 fuse_ops[1] = value;
33287 addis_str = "lis %0,%1@ha";
33288 }
33289 }
33290
33291 if (!addis_str)
33292 fatal_insn ("Could not generate addis value for fusion", addis_value);
33293
33294 output_asm_insn (addis_str, fuse_ops);
33295 }
33296
33297 /* Emit a D-form load or store instruction that is the second instruction
33298 of a fusion sequence. */
33299
33300 static void
33301 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
33302 {
33303 rtx fuse_ops[10];
33304 char insn_template[80];
33305
33306 fuse_ops[0] = load_reg;
33307 fuse_ops[1] = addis_reg;
33308
33309 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
33310 {
33311 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
33312 fuse_ops[2] = offset;
33313 output_asm_insn (insn_template, fuse_ops);
33314 }
33315
33316 else if (GET_CODE (offset) == UNSPEC
33317 && XINT (offset, 1) == UNSPEC_TOCREL)
33318 {
33319 if (TARGET_ELF)
33320 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
33321
33322 else if (TARGET_XCOFF)
33323 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33324
33325 else
33326 gcc_unreachable ();
33327
33328 fuse_ops[2] = XVECEXP (offset, 0, 0);
33329 output_asm_insn (insn_template, fuse_ops);
33330 }
33331
33332 else if (GET_CODE (offset) == PLUS
33333 && GET_CODE (XEXP (offset, 0)) == UNSPEC
33334 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
33335 && CONST_INT_P (XEXP (offset, 1)))
33336 {
33337 rtx tocrel_unspec = XEXP (offset, 0);
33338 if (TARGET_ELF)
33339 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
33340
33341 else if (TARGET_XCOFF)
33342 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
33343
33344 else
33345 gcc_unreachable ();
33346
33347 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
33348 fuse_ops[3] = XEXP (offset, 1);
33349 output_asm_insn (insn_template, fuse_ops);
33350 }
33351
33352 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
33353 {
33354 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33355
33356 fuse_ops[2] = offset;
33357 output_asm_insn (insn_template, fuse_ops);
33358 }
33359
33360 else
33361 fatal_insn ("Unable to generate load/store offset for fusion", offset);
33362
33363 return;
33364 }
33365
33366 /* Given an address, convert it into the addis and load offset parts. Addresses
33367 created during the peephole2 process look like:
33368 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
33369 (unspec [(...)] UNSPEC_TOCREL)) */
33370
33371 static void
33372 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
33373 {
33374 rtx hi, lo;
33375
33376 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
33377 {
33378 hi = XEXP (addr, 0);
33379 lo = XEXP (addr, 1);
33380 }
33381 else
33382 gcc_unreachable ();
33383
33384 *p_hi = hi;
33385 *p_lo = lo;
33386 }
33387
33388 /* Return a string to fuse an addis instruction with a gpr load to the same
33389 register that we loaded up the addis instruction. The address that is used
33390 is the logical address that was formed during peephole2:
33391 (lo_sum (high) (low-part))
33392
33393 The code is complicated, so we call output_asm_insn directly, and just
33394 return "". */
33395
33396 const char *
33397 emit_fusion_gpr_load (rtx target, rtx mem)
33398 {
33399 rtx addis_value;
33400 rtx addr;
33401 rtx load_offset;
33402 const char *load_str = NULL;
33403 machine_mode mode;
33404
33405 if (GET_CODE (mem) == ZERO_EXTEND)
33406 mem = XEXP (mem, 0);
33407
33408 gcc_assert (REG_P (target) && MEM_P (mem));
33409
33410 addr = XEXP (mem, 0);
33411 fusion_split_address (addr, &addis_value, &load_offset);
33412
33413 /* Now emit the load instruction to the same register. */
33414 mode = GET_MODE (mem);
33415 switch (mode)
33416 {
33417 case E_QImode:
33418 load_str = "lbz";
33419 break;
33420
33421 case E_HImode:
33422 load_str = "lhz";
33423 break;
33424
33425 case E_SImode:
33426 case E_SFmode:
33427 load_str = "lwz";
33428 break;
33429
33430 case E_DImode:
33431 case E_DFmode:
33432 gcc_assert (TARGET_POWERPC64);
33433 load_str = "ld";
33434 break;
33435
33436 default:
33437 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
33438 }
33439
33440 /* Emit the addis instruction. */
33441 emit_fusion_addis (target, addis_value);
33442
33443 /* Emit the D-form load instruction. */
33444 emit_fusion_load (target, target, load_offset, load_str);
33445
33446 return "";
33447 }
33448 \f
33449
33450 #ifdef RS6000_GLIBC_ATOMIC_FENV
33451 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
33452 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
33453 #endif
33454
33455 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
33456
33457 static void
33458 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
33459 {
33460 if (!TARGET_HARD_FLOAT)
33461 {
33462 #ifdef RS6000_GLIBC_ATOMIC_FENV
33463 if (atomic_hold_decl == NULL_TREE)
33464 {
33465 atomic_hold_decl
33466 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33467 get_identifier ("__atomic_feholdexcept"),
33468 build_function_type_list (void_type_node,
33469 double_ptr_type_node,
33470 NULL_TREE));
33471 TREE_PUBLIC (atomic_hold_decl) = 1;
33472 DECL_EXTERNAL (atomic_hold_decl) = 1;
33473 }
33474
33475 if (atomic_clear_decl == NULL_TREE)
33476 {
33477 atomic_clear_decl
33478 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33479 get_identifier ("__atomic_feclearexcept"),
33480 build_function_type_list (void_type_node,
33481 NULL_TREE));
33482 TREE_PUBLIC (atomic_clear_decl) = 1;
33483 DECL_EXTERNAL (atomic_clear_decl) = 1;
33484 }
33485
33486 tree const_double = build_qualified_type (double_type_node,
33487 TYPE_QUAL_CONST);
33488 tree const_double_ptr = build_pointer_type (const_double);
33489 if (atomic_update_decl == NULL_TREE)
33490 {
33491 atomic_update_decl
33492 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33493 get_identifier ("__atomic_feupdateenv"),
33494 build_function_type_list (void_type_node,
33495 const_double_ptr,
33496 NULL_TREE));
33497 TREE_PUBLIC (atomic_update_decl) = 1;
33498 DECL_EXTERNAL (atomic_update_decl) = 1;
33499 }
33500
33501 tree fenv_var = create_tmp_var_raw (double_type_node);
33502 TREE_ADDRESSABLE (fenv_var) = 1;
33503 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
33504
33505 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
33506 *clear = build_call_expr (atomic_clear_decl, 0);
33507 *update = build_call_expr (atomic_update_decl, 1,
33508 fold_convert (const_double_ptr, fenv_addr));
33509 #endif
33510 return;
33511 }
33512
33513 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
33514 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
33515 tree call_mffs = build_call_expr (mffs, 0);
33516
33517 /* Generates the equivalent of feholdexcept (&fenv_var)
33518
33519 *fenv_var = __builtin_mffs ();
33520 double fenv_hold;
33521 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
33522 __builtin_mtfsf (0xff, fenv_hold); */
33523
33524 /* Mask to clear everything except for the rounding modes and non-IEEE
33525 arithmetic flag. */
33526 const unsigned HOST_WIDE_INT hold_exception_mask =
33527 HOST_WIDE_INT_C (0xffffffff00000007);
33528
33529 tree fenv_var = create_tmp_var_raw (double_type_node);
33530
33531 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
33532
33533 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
33534 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33535 build_int_cst (uint64_type_node,
33536 hold_exception_mask));
33537
33538 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33539 fenv_llu_and);
33540
33541 tree hold_mtfsf = build_call_expr (mtfsf, 2,
33542 build_int_cst (unsigned_type_node, 0xff),
33543 fenv_hold_mtfsf);
33544
33545 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
33546
33547 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
33548
33549 double fenv_clear = __builtin_mffs ();
33550 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
33551 __builtin_mtfsf (0xff, fenv_clear); */
33552
33553 /* Mask to clear everything except for the rounding modes and non-IEEE
33554 arithmetic flag. */
33555 const unsigned HOST_WIDE_INT clear_exception_mask =
33556 HOST_WIDE_INT_C (0xffffffff00000000);
33557
33558 tree fenv_clear = create_tmp_var_raw (double_type_node);
33559
33560 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
33561
33562 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
33563 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
33564 fenv_clean_llu,
33565 build_int_cst (uint64_type_node,
33566 clear_exception_mask));
33567
33568 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33569 fenv_clear_llu_and);
33570
33571 tree clear_mtfsf = build_call_expr (mtfsf, 2,
33572 build_int_cst (unsigned_type_node, 0xff),
33573 fenv_clear_mtfsf);
33574
33575 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
33576
33577 /* Generates the equivalent of feupdateenv (&fenv_var)
33578
33579 double old_fenv = __builtin_mffs ();
33580 double fenv_update;
33581 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
33582 (*(uint64_t*)fenv_var 0x1ff80fff);
33583 __builtin_mtfsf (0xff, fenv_update); */
33584
33585 const unsigned HOST_WIDE_INT update_exception_mask =
33586 HOST_WIDE_INT_C (0xffffffff1fffff00);
33587 const unsigned HOST_WIDE_INT new_exception_mask =
33588 HOST_WIDE_INT_C (0x1ff80fff);
33589
33590 tree old_fenv = create_tmp_var_raw (double_type_node);
33591 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
33592
33593 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
33594 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
33595 build_int_cst (uint64_type_node,
33596 update_exception_mask));
33597
33598 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33599 build_int_cst (uint64_type_node,
33600 new_exception_mask));
33601
33602 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
33603 old_llu_and, new_llu_and);
33604
33605 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33606 new_llu_mask);
33607
33608 tree update_mtfsf = build_call_expr (mtfsf, 2,
33609 build_int_cst (unsigned_type_node, 0xff),
33610 fenv_update_mtfsf);
33611
33612 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
33613 }
33614
33615 void
33616 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
33617 {
33618 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33619
33620 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33621 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33622
33623 /* The destination of the vmrgew instruction layout is:
33624 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33625 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33626 vmrgew instruction will be correct. */
33627 if (BYTES_BIG_ENDIAN)
33628 {
33629 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
33630 GEN_INT (0)));
33631 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
33632 GEN_INT (3)));
33633 }
33634 else
33635 {
33636 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
33637 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
33638 }
33639
33640 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33641 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33642
33643 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
33644 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
33645
33646 if (BYTES_BIG_ENDIAN)
33647 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33648 else
33649 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33650 }
33651
33652 void
33653 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
33654 {
33655 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33656
33657 rtx_tmp0 = gen_reg_rtx (V2DImode);
33658 rtx_tmp1 = gen_reg_rtx (V2DImode);
33659
33660 /* The destination of the vmrgew instruction layout is:
33661 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33662 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33663 vmrgew instruction will be correct. */
33664 if (BYTES_BIG_ENDIAN)
33665 {
33666 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
33667 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
33668 }
33669 else
33670 {
33671 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
33672 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
33673 }
33674
33675 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33676 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33677
33678 if (signed_convert)
33679 {
33680 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
33681 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
33682 }
33683 else
33684 {
33685 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
33686 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
33687 }
33688
33689 if (BYTES_BIG_ENDIAN)
33690 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33691 else
33692 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33693 }
33694
33695 void
33696 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
33697 rtx src2)
33698 {
33699 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33700
33701 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33702 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33703
33704 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
33705 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
33706
33707 rtx_tmp2 = gen_reg_rtx (V4SImode);
33708 rtx_tmp3 = gen_reg_rtx (V4SImode);
33709
33710 if (signed_convert)
33711 {
33712 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
33713 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
33714 }
33715 else
33716 {
33717 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
33718 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
33719 }
33720
33721 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
33722 }
33723
33724 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
33725
33726 static bool
33727 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
33728 optimization_type opt_type)
33729 {
33730 switch (op)
33731 {
33732 case rsqrt_optab:
33733 return (opt_type == OPTIMIZE_FOR_SPEED
33734 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
33735
33736 default:
33737 return true;
33738 }
33739 }
33740
33741 /* Implement TARGET_CONSTANT_ALIGNMENT. */
33742
33743 static HOST_WIDE_INT
33744 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33745 {
33746 if (TREE_CODE (exp) == STRING_CST
33747 && (STRICT_ALIGNMENT || !optimize_size))
33748 return MAX (align, BITS_PER_WORD);
33749 return align;
33750 }
33751
33752 /* Implement TARGET_STARTING_FRAME_OFFSET. */
33753
33754 static HOST_WIDE_INT
33755 rs6000_starting_frame_offset (void)
33756 {
33757 if (FRAME_GROWS_DOWNWARD)
33758 return 0;
33759 return RS6000_STARTING_FRAME_OFFSET;
33760 }
33761 \f
33762
33763 /* Create an alias for a mangled name where we have changed the mangling (in
33764 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
33765 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
33766
33767 #if TARGET_ELF && RS6000_WEAK
33768 static void
33769 rs6000_globalize_decl_name (FILE * stream, tree decl)
33770 {
33771 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33772
33773 targetm.asm_out.globalize_label (stream, name);
33774
33775 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
33776 {
33777 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
33778 const char *old_name;
33779
33780 ieee128_mangling_gcc_8_1 = true;
33781 lang_hooks.set_decl_assembler_name (decl);
33782 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33783 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
33784 ieee128_mangling_gcc_8_1 = false;
33785
33786 if (strcmp (name, old_name) != 0)
33787 {
33788 fprintf (stream, "\t.weak %s\n", old_name);
33789 fprintf (stream, "\t.set %s,%s\n", old_name, name);
33790 }
33791 }
33792 }
33793 #endif
33794
33795 \f
33796 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
33797 function names from <foo>l to <foo>f128 if the default long double type is
33798 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
33799 include file switches the names on systems that support long double as IEEE
33800 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
33801 In the future, glibc will export names like __ieee128_sinf128 and we can
33802 switch to using those instead of using sinf128, which pollutes the user's
33803 namespace.
33804
33805 This will switch the names for Fortran math functions as well (which doesn't
33806 use math.h). However, Fortran needs other changes to the compiler and
33807 library before you can switch the real*16 type at compile time.
33808
33809 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
33810 only do this if the default is that long double is IBM extended double, and
33811 the user asked for IEEE 128-bit. */
33812
33813 static tree
33814 rs6000_mangle_decl_assembler_name (tree decl, tree id)
33815 {
33816 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
33817 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
33818 {
33819 size_t len = IDENTIFIER_LENGTH (id);
33820 const char *name = IDENTIFIER_POINTER (id);
33821
33822 if (name[len - 1] == 'l')
33823 {
33824 bool uses_ieee128_p = false;
33825 tree type = TREE_TYPE (decl);
33826 machine_mode ret_mode = TYPE_MODE (type);
33827
33828 /* See if the function returns a IEEE 128-bit floating point type or
33829 complex type. */
33830 if (ret_mode == TFmode || ret_mode == TCmode)
33831 uses_ieee128_p = true;
33832 else
33833 {
33834 function_args_iterator args_iter;
33835 tree arg;
33836
33837 /* See if the function passes a IEEE 128-bit floating point type
33838 or complex type. */
33839 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
33840 {
33841 machine_mode arg_mode = TYPE_MODE (arg);
33842 if (arg_mode == TFmode || arg_mode == TCmode)
33843 {
33844 uses_ieee128_p = true;
33845 break;
33846 }
33847 }
33848 }
33849
33850 /* If we passed or returned an IEEE 128-bit floating point type,
33851 change the name. */
33852 if (uses_ieee128_p)
33853 {
33854 char *name2 = (char *) alloca (len + 4);
33855 memcpy (name2, name, len - 1);
33856 strcpy (name2 + len - 1, "f128");
33857 id = get_identifier (name2);
33858 }
33859 }
33860 }
33861
33862 return id;
33863 }
33864
33865 /* Predict whether the given loop in gimple will be transformed in the RTL
33866 doloop_optimize pass. */
33867
33868 static bool
33869 rs6000_predict_doloop_p (struct loop *loop)
33870 {
33871 gcc_assert (loop);
33872
33873 /* On rs6000, targetm.can_use_doloop_p is actually
33874 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
33875 if (loop->inner != NULL)
33876 {
33877 if (dump_file && (dump_flags & TDF_DETAILS))
33878 fprintf (dump_file, "Predict doloop failure due to"
33879 " loop nesting.\n");
33880 return false;
33881 }
33882
33883 return true;
33884 }
33885
33886 struct gcc_target targetm = TARGET_INITIALIZER;
33887
33888 #include "gt-rs6000.h"